[feature](join) support nested loop semi/anti join (#14227)

This commit is contained in:
Gabriel
2022-11-17 22:20:08 +08:00
committed by GitHub
parent d5af4f6558
commit 50bfd99b59
22 changed files with 394 additions and 390 deletions

View File

@ -233,7 +233,6 @@ HashJoinNode::HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const Descr
_arena = std::make_unique<Arena>();
_hash_table_variants = std::make_unique<HashTableVariants>();
_process_hashtable_ctx_variants = std::make_unique<HashTableCtxVariants>();
_init_join_op();
// avoid vector expand change block address.
// one block can store 4g data, _build_blocks can store 128*4g data.
@ -278,7 +277,6 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
for (size_t i = 0; i < _probe_expr_ctxs.size(); ++i) {
_probe_ignore_null |= !probe_not_ignore_null[i];
}
_short_circuit_for_null_in_build_side = _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
_probe_column_disguise_null.reserve(eq_join_conjuncts.size());

View File

@ -254,12 +254,6 @@ private:
Sizes _probe_key_sz;
Sizes _build_key_sz;
// For null aware left anti join, we apply a short circuit strategy.
// 1. Set _short_circuit_for_null_in_build_side to true if join operator is null aware left anti join.
// 2. In build phase, we stop building hash table when we meet the first null value and set _short_circuit_for_null_in_probe_side to true.
// 3. In probe phase, if _short_circuit_for_null_in_probe_side is true, join node returns empty block directly. Otherwise, probing will continue as the same as generic left anti join.
bool _short_circuit_for_null_in_build_side = false;
bool _short_circuit_for_null_in_probe_side = false;
bool _is_broadcast_join = false;
SharedHashTableController* _shared_hashtable_controller = nullptr;
VRuntimeFilterSlots* _runtime_filter_slots;

View File

@ -44,7 +44,12 @@ VJoinNodeBase::VJoinNodeBase(ObjectPool* pool, const TPlanNode& tnode, const Des
_join_op == TJoinOp::LEFT_SEMI_JOIN)),
_is_right_semi_anti(_join_op == TJoinOp::RIGHT_ANTI_JOIN ||
_join_op == TJoinOp::RIGHT_SEMI_JOIN),
_is_outer_join(_match_all_build || _match_all_probe) {
_is_left_semi_anti(_join_op == TJoinOp::LEFT_ANTI_JOIN ||
_join_op == TJoinOp::LEFT_SEMI_JOIN ||
_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN),
_is_outer_join(_match_all_build || _match_all_probe),
_short_circuit_for_null_in_build_side(_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
_init_join_op();
if (tnode.__isset.hash_join_node) {
_output_row_desc.reset(
new RowDescriptor(descs, {tnode.hash_join_node.voutput_tuple_id}, {false}));

View File

@ -80,8 +80,16 @@ protected:
bool _build_unique; // build a hash table without duplicated rows. Left semi/anti Join
const bool _is_right_semi_anti;
const bool _is_left_semi_anti;
const bool _is_outer_join;
// For null aware left anti join, we apply a short circuit strategy.
// 1. Set _short_circuit_for_null_in_build_side to true if join operator is null aware left anti join.
// 2. In build phase, we stop materialize build side when we meet the first null value and set _short_circuit_for_null_in_probe_side to true.
// 3. In probe phase, if _short_circuit_for_null_in_probe_side is true, join node returns empty block directly. Otherwise, probing will continue as the same as generic left anti join.
const bool _short_circuit_for_null_in_build_side = false;
bool _short_circuit_for_null_in_probe_side = false;
std::unique_ptr<RowDescriptor> _output_row_desc;
std::unique_ptr<RowDescriptor> _intermediate_row_desc;
// output expr

View File

@ -43,9 +43,6 @@ VNestedLoopJoinNode::VNestedLoopJoinNode(ObjectPool* pool, const TPlanNode& tnod
_old_version_flag(!tnode.__isset.nested_loop_join_node) {}
Status VNestedLoopJoinNode::prepare(RuntimeState* state) {
DCHECK(_join_op == TJoinOp::CROSS_JOIN || _join_op == TJoinOp::INNER_JOIN ||
_join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op == TJoinOp::RIGHT_OUTER_JOIN ||
_join_op == TJoinOp::FULL_OUTER_JOIN);
SCOPED_TIMER(_runtime_profile->total_time_counter());
RETURN_IF_ERROR(VJoinNodeBase::prepare(state));
SCOPED_CONSUME_MEM_TRACKER(mem_tracker());
@ -99,7 +96,7 @@ Status VNestedLoopJoinNode::_materialize_build_side(RuntimeState* state) {
_build_rows += rows;
_total_mem_usage += mem_usage;
_build_blocks.emplace_back(std::move(block));
if (_match_all_build) {
if (_match_all_build || _is_right_semi_anti) {
_build_side_visited_flags.emplace_back(ColumnUInt8::create(rows, 0));
}
}
@ -134,6 +131,7 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo
std::stack<uint16_t> offset_stack;
RETURN_IF_ERROR(std::visit(
[&](auto&& join_op_variants, auto set_build_side_flag, auto set_probe_side_flag) {
using JoinOpType = std::decay_t<decltype(join_op_variants)>;
while (mutable_join_block.rows() < state->batch_size() && !_matched_rows_done) {
// If this left block is exhausted or empty, we need to pull data from left child.
if (_left_block_pos == _left_block.rows()) {
@ -173,7 +171,7 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo
Block tmp_block = mutable_join_block.to_block(0);
Status status = _do_filtering_and_update_visited_flags<set_build_side_flag,
set_probe_side_flag>(
&tmp_block, offset_stack);
&tmp_block, offset_stack, !_is_left_semi_anti);
if (!status.OK()) {
return status;
}
@ -184,7 +182,9 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo
// probe row with null from build side.
if (_current_build_pos == _build_blocks.size()) {
if (!_matched_rows_done) {
_output_null_data<false>(dst_columns, state->batch_size());
_finalize_current_phase<false, JoinOpType::value ==
TJoinOp::LEFT_SEMI_JOIN>(
dst_columns, state->batch_size());
_reset_with_next_probe_row(dst_columns);
}
break;
@ -199,7 +199,7 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo
Block tmp_block = mutable_join_block.to_block(0);
Status status = _do_filtering_and_update_visited_flags<set_build_side_flag,
set_probe_side_flag>(
&tmp_block, offset_stack);
&tmp_block, offset_stack, !_is_right_semi_anti);
mutable_join_block = MutableBlock(std::move(tmp_block));
if (!status.OK()) {
return status;
@ -209,13 +209,15 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo
if constexpr (set_build_side_flag) {
if (_matched_rows_done && _output_null_idx_build_side < _build_blocks.size()) {
auto& cols = mutable_join_block.mutable_columns();
_output_null_data<true>(cols, state->batch_size());
_finalize_current_phase<true,
JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN>(
cols, state->batch_size());
}
}
return Status::OK();
},
_join_op_variants, make_bool_variant(_match_all_build),
make_bool_variant(_match_all_probe)));
_join_op_variants, make_bool_variant(_match_all_build || _is_right_semi_anti),
make_bool_variant(_match_all_probe || _is_left_semi_anti)));
*eos = _match_all_build
? _output_null_idx_build_side == _build_blocks.size() && _matched_rows_done
: _matched_rows_done;
@ -265,8 +267,8 @@ void VNestedLoopJoinNode::_process_left_child_block(MutableColumns& dst_columns,
}
}
template <bool BuildSide>
void VNestedLoopJoinNode::_output_null_data(MutableColumns& dst_columns, size_t batch_size) {
template <bool BuildSide, bool IsSemi>
void VNestedLoopJoinNode::_finalize_current_phase(MutableColumns& dst_columns, size_t batch_size) {
if constexpr (BuildSide) {
auto build_block_sz = _build_blocks.size();
size_t i = _output_null_idx_build_side;
@ -281,13 +283,21 @@ void VNestedLoopJoinNode::_output_null_data(MutableColumns& dst_columns, size_t
std::vector<int> selector(num_rows);
size_t selector_idx = 0;
for (size_t j = 0; j < num_rows; j++) {
if (!cur_visited_flags[j]) {
selector[selector_idx++] = j;
if constexpr (IsSemi) {
if (cur_visited_flags[j]) {
selector[selector_idx++] = j;
}
} else {
if (!cur_visited_flags[j]) {
selector[selector_idx++] = j;
}
}
}
for (size_t j = 0; j < _num_probe_side_columns; ++j) {
DCHECK(_join_op == TJoinOp::RIGHT_OUTER_JOIN ||
_join_op == TJoinOp::FULL_OUTER_JOIN);
_join_op == TJoinOp::FULL_OUTER_JOIN ||
_join_op == TJoinOp::RIGHT_ANTI_JOIN ||
_join_op == TJoinOp::RIGHT_SEMI_JOIN);
dst_columns[j]->insert_many_defaults(selector_idx);
}
@ -319,8 +329,14 @@ void VNestedLoopJoinNode::_output_null_data(MutableColumns& dst_columns, size_t
}
_output_null_idx_build_side = i;
} else {
if (_cur_probe_row_visited_flags) {
return;
if constexpr (IsSemi) {
if (!_cur_probe_row_visited_flags) {
return;
}
} else {
if (_cur_probe_row_visited_flags) {
return;
}
}
DCHECK_LT(_left_block_pos, _left_block.rows());
@ -355,7 +371,7 @@ void VNestedLoopJoinNode::_reset_with_next_probe_row(MutableColumns& dst_columns
template <bool SetBuildSideFlag, bool SetProbeSideFlag>
Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(
Block* block, std::stack<uint16_t>& offset_stack) {
Block* block, std::stack<uint16_t>& offset_stack, bool materialize) {
auto column_to_keep = block->columns();
// If we need to set visited flags for build side,
// 1. Execute conjuncts and get a column with bool type to do filtering.
@ -406,7 +422,15 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(
if constexpr (SetProbeSideFlag) {
_cur_probe_row_visited_flags |= simd::contain_byte<uint8>(filter_data, size, 1);
}
Block::filter_block_internal(block, filter, column_to_keep);
#define CLEAR_BLOCK \
for (size_t i = 0; i < column_to_keep; ++i) { \
block->get_by_position(i).column->assume_mutable()->clear(); \
}
if (materialize) {
Block::filter_block_internal(block, filter, column_to_keep);
} else {
CLEAR_BLOCK
}
} else if (auto* const_column = check_and_get_column<ColumnConst>(*filter_column)) {
bool ret = const_column->get_bool(0);
if (!ret) {
@ -431,6 +455,9 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(
if constexpr (SetProbeSideFlag) {
_cur_probe_row_visited_flags |= ret;
}
if (!materialize) {
CLEAR_BLOCK
}
}
} else {
const IColumn::Filter& filter =
@ -457,10 +484,14 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(
_cur_probe_row_visited_flags |=
simd::contain_byte<uint8>(filter.data(), filter.size(), 1);
}
Block::filter_block_internal(block, filter, column_to_keep);
if (materialize) {
Block::filter_block_internal(block, filter, column_to_keep);
} else {
CLEAR_BLOCK
}
}
Block::erase_useless_column(block, column_to_keep);
}
#undef CLEAR_BLOCK
return Status::OK();
}

View File

@ -68,10 +68,11 @@ private:
const Block& now_process_build_block) const;
template <bool SetBuildSideFlag, bool SetProbeSideFlag>
Status _do_filtering_and_update_visited_flags(Block* block, std::stack<uint16_t>& offset_stack);
Status _do_filtering_and_update_visited_flags(Block* block, std::stack<uint16_t>& offset_stack,
bool materialize);
template <bool BuildSide>
void _output_null_data(MutableColumns& dst_columns, size_t batch_size);
template <bool BuildSide, bool IsSemi>
void _finalize_current_phase(MutableColumns& dst_columns, size_t batch_size);
void _reset_with_next_probe_row(MutableColumns& dst_columns);

View File

@ -724,25 +724,23 @@ public class StmtRewriter {
}
if (!hasEqJoinPred && !inlineView.isCorrelated()) {
// TODO: Remove this when independent subquery evaluation is implemented.
// TODO: Requires support for non-equi joins.
if (!expr.getSubquery().returnsScalarColumn()) {
throw new AnalysisException("Unsupported predicate with subquery: "
+ expr.toSql());
// Join with InPredicate is actually an equal join, so we choose HashJoin.
Preconditions.checkArgument(!(expr instanceof InPredicate));
if (expr instanceof ExistsPredicate) {
joinOp = ((ExistsPredicate) expr).isNotExists() ? JoinOperator.LEFT_ANTI_JOIN
: JoinOperator.LEFT_SEMI_JOIN;
} else {
joinOp = JoinOperator.CROSS_JOIN;
// We can equal the aggregate subquery using a cross join. All conjuncts
// that were extracted from the subquery are added to stmt's WHERE clause.
stmt.whereClause =
CompoundPredicate.createConjunction(onClausePredicate, stmt.whereClause);
}
// TODO: Requires support for null-aware anti-join mode in nested-loop joins
if (expr.getSubquery().isScalarSubquery() && expr instanceof InPredicate
&& ((InPredicate) expr).isNotIn()) {
throw new AnalysisException("Unsupported NOT IN predicate with subquery: "
+ expr.toSql());
inlineView.setJoinOp(joinOp);
if (joinOp != JoinOperator.CROSS_JOIN) {
inlineView.setOnClause(onClausePredicate);
}
// We can equal the aggregate subquery using a cross join. All conjuncts
// that were extracted from the subquery are added to stmt's WHERE clause.
stmt.whereClause =
CompoundPredicate.createConjunction(onClausePredicate, stmt.whereClause);
inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
// Indicate that the CROSS JOIN may add a new visible tuple to stmt's
// select list (if the latter contains an unqualified star item '*')
return true;

View File

@ -71,12 +71,12 @@ import org.apache.doris.nereids.util.JoinUtils;
import org.apache.doris.nereids.util.Utils;
import org.apache.doris.planner.AggregationNode;
import org.apache.doris.planner.AssertNumRowsNode;
import org.apache.doris.planner.CrossJoinNode;
import org.apache.doris.planner.DataPartition;
import org.apache.doris.planner.EmptySetNode;
import org.apache.doris.planner.ExchangeNode;
import org.apache.doris.planner.HashJoinNode;
import org.apache.doris.planner.HashJoinNode.DistributionMode;
import org.apache.doris.planner.NestedLoopJoinNode;
import org.apache.doris.planner.OlapScanNode;
import org.apache.doris.planner.PlanFragment;
import org.apache.doris.planner.PlanNode;
@ -719,10 +719,13 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
.map(TupleDescriptor::getId)
.collect(Collectors.toList());
CrossJoinNode crossJoinNode = new CrossJoinNode(context.nextPlanNodeId(),
leftFragmentPlanRoot, rightFragmentPlanRoot, tupleIds, null, null, null);
JoinType joinType = nestedLoopJoin.getJoinType();
NestedLoopJoinNode nestedLoopJoinNode = new NestedLoopJoinNode(context.nextPlanNodeId(),
leftFragmentPlanRoot, rightFragmentPlanRoot, tupleIds, JoinType.toJoinOperator(joinType),
null, null, null);
if (nestedLoopJoin.getStats() != null) {
crossJoinNode.setCardinality((long) nestedLoopJoin.getStats().getRowCount());
nestedLoopJoinNode.setCardinality((long) nestedLoopJoin.getStats().getRowCount());
}
Map<ExprId, SlotReference> leftChildOutputMap = Maps.newHashMap();
@ -763,15 +766,14 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
.filter(Objects::nonNull)
.collect(Collectors.toList());
JoinType joinType = nestedLoopJoin.getJoinType();
if (crossJoinNode.getConjuncts().isEmpty()
if (nestedLoopJoinNode.getConjuncts().isEmpty()
&& (joinType == JoinType.LEFT_ANTI_JOIN || joinType == JoinType.LEFT_SEMI_JOIN)) {
for (SlotDescriptor leftSlotDescriptor : leftSlotDescriptors) {
SlotReference sf = leftChildOutputMap.get(context.findExprId(leftSlotDescriptor.getId()));
SlotDescriptor sd = context.createSlotDesc(intermediateDescriptor, sf);
leftIntermediateSlotDescriptor.add(sd);
}
} else if (crossJoinNode.getConjuncts().isEmpty()
} else if (nestedLoopJoinNode.getConjuncts().isEmpty()
&& (joinType == JoinType.RIGHT_ANTI_JOIN || joinType == JoinType.RIGHT_SEMI_JOIN)) {
for (SlotDescriptor rightSlotDescriptor : rightSlotDescriptors) {
SlotReference sf = rightChildOutputMap.get(context.findExprId(rightSlotDescriptor.getId()));
@ -799,14 +801,14 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
leftIntermediateSlotDescriptor.forEach(sd -> sd.setIsNullable(true));
}
crossJoinNode.setvIntermediateTupleDescList(Lists.newArrayList(intermediateDescriptor));
nestedLoopJoinNode.setvIntermediateTupleDescList(Lists.newArrayList(intermediateDescriptor));
rightFragment.getPlanRoot().setCompactData(false);
crossJoinNode.setChild(0, leftFragment.getPlanRoot());
connectChildFragment(crossJoinNode, 1, leftFragment, rightFragment, context);
leftFragment.setPlanRoot(crossJoinNode);
nestedLoopJoinNode.setChild(0, leftFragment.getPlanRoot());
connectChildFragment(nestedLoopJoinNode, 1, leftFragment, rightFragment, context);
leftFragment.setPlanRoot(nestedLoopJoinNode);
nestedLoopJoin.getOtherJoinConjuncts().stream()
.map(e -> ExpressionTranslator.translate(e, context)).forEach(crossJoinNode::addConjunct);
.map(e -> ExpressionTranslator.translate(e, context)).forEach(nestedLoopJoinNode::addConjunct);
if (nestedLoopJoin.isShouldTranslateOutput()) {
// translate output expr on intermediate tuple
@ -817,11 +819,11 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
TupleDescriptor outputDescriptor = context.generateTupleDesc();
outputSlotReferences.forEach(s -> context.createSlotDesc(outputDescriptor, s));
crossJoinNode.setvOutputTupleDesc(outputDescriptor);
crossJoinNode.setvSrcToOutputSMap(srcToOutput);
nestedLoopJoinNode.setvOutputTupleDesc(outputDescriptor);
nestedLoopJoinNode.setvSrcToOutputSMap(srcToOutput);
}
if (nestedLoopJoin.getStats() != null) {
crossJoinNode.setCardinality((long) nestedLoopJoin.getStats().getRowCount());
nestedLoopJoinNode.setCardinality((long) nestedLoopJoin.getStats().getRowCount());
}
return leftFragment;
} else {
@ -865,10 +867,10 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
hashJoinNode.setvSrcToOutputSMap(execExprList);
return inputFragment;
}
if (inputPlanNode instanceof CrossJoinNode) {
CrossJoinNode crossJoinNode = (CrossJoinNode) inputPlanNode;
crossJoinNode.setvOutputTupleDesc(tupleDescriptor);
crossJoinNode.setvSrcToOutputSMap(execExprList);
if (inputPlanNode instanceof NestedLoopJoinNode) {
NestedLoopJoinNode nestedLoopJoinNode = (NestedLoopJoinNode) inputPlanNode;
nestedLoopJoinNode.setvOutputTupleDesc(tupleDescriptor);
nestedLoopJoinNode.setvSrcToOutputSMap(execExprList);
return inputFragment;
}
inputPlanNode.setProjectList(execExprList);

View File

@ -205,8 +205,8 @@ public class DistributedPlanner {
Preconditions.checkState(childFragments.size() == 2);
result = createHashJoinFragment((HashJoinNode) root,
childFragments.get(1), childFragments.get(0), fragments);
} else if (root instanceof CrossJoinNode) {
result = createCrossJoinFragment((CrossJoinNode) root, childFragments.get(1),
} else if (root instanceof NestedLoopJoinNode) {
result = createNestedLoopJoinFragment((NestedLoopJoinNode) root, childFragments.get(1),
childFragments.get(0));
} else if (root instanceof SelectNode) {
result = createSelectNodeFragment((SelectNode) root, childFragments);
@ -697,16 +697,42 @@ public class DistributedPlanner {
* Modifies the leftChildFragment to execute a cross join. The right child input is provided by an ExchangeNode,
* which is the destination of the rightChildFragment's output.
*/
private PlanFragment createCrossJoinFragment(
CrossJoinNode node, PlanFragment rightChildFragment, PlanFragment leftChildFragment)
private PlanFragment createNestedLoopJoinFragment(
NestedLoopJoinNode node, PlanFragment rightChildFragment, PlanFragment leftChildFragment)
throws UserException {
// The rhs tree is going to send data through an exchange node which effectively
// compacts the data. No reason to do it again at the rhs root node.
rightChildFragment.getPlanRoot().setCompactData(false);
node.setChild(0, leftChildFragment.getPlanRoot());
connectChildFragment(node, 1, leftChildFragment, rightChildFragment);
leftChildFragment.setPlanRoot(node);
return leftChildFragment;
if (node.canParallelize()) {
// The rhs tree is going to send data through an exchange node which effectively
// compacts the data. No reason to do it again at the rhs root node.
rightChildFragment.getPlanRoot().setCompactData(false);
node.setChild(0, leftChildFragment.getPlanRoot());
connectChildFragment(node, 1, leftChildFragment, rightChildFragment);
leftChildFragment.setPlanRoot(node);
return leftChildFragment;
} else {
// For non-equal nljoin, we should make sure using only one instance to do processing.
DataPartition lhsJoinPartition = new DataPartition(TPartitionType.UNPARTITIONED);
ExchangeNode lhsExchange =
new ExchangeNode(ctx.getNextNodeId(), leftChildFragment.getPlanRoot(), false);
lhsExchange.setNumInstances(1);
lhsExchange.init(ctx.getRootAnalyzer());
DataPartition rhsJoinPartition =
new DataPartition(TPartitionType.UNPARTITIONED);
ExchangeNode rhsExchange =
new ExchangeNode(ctx.getNextNodeId(), rightChildFragment.getPlanRoot(), false);
rhsExchange.setNumInstances(1);
rhsExchange.init(ctx.getRootAnalyzer());
node.setChild(0, lhsExchange);
node.setChild(1, rhsExchange);
PlanFragment joinFragment = new PlanFragment(ctx.getNextFragmentId(), node, lhsJoinPartition);
// connect the child fragments
leftChildFragment.setDestination(lhsExchange);
leftChildFragment.setOutputPartition(lhsJoinPartition);
rightChildFragment.setDestination(rhsExchange);
rightChildFragment.setOutputPartition(rhsJoinPartition);
return joinFragment;
}
}
/**

View File

@ -49,17 +49,23 @@ import java.util.Set;
import java.util.stream.Collectors;
/**
* Cross join between left child and right child.
* Nested loop join between left child and right child.
*/
public class CrossJoinNode extends JoinNodeBase {
private static final Logger LOG = LogManager.getLogger(CrossJoinNode.class);
public class NestedLoopJoinNode extends JoinNodeBase {
private static final Logger LOG = LogManager.getLogger(NestedLoopJoinNode.class);
public CrossJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, TableRef innerRef) {
super(id, "CROSS JOIN", StatisticalType.CROSS_JOIN_NODE, outer, inner, innerRef);
public NestedLoopJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, TableRef innerRef) {
super(id, "NESTED LOOP JOIN", StatisticalType.NESTED_LOOP_JOIN_NODE, outer, inner, innerRef);
tupleIds.addAll(outer.getTupleIds());
tupleIds.addAll(inner.getTupleIds());
}
public boolean canParallelize() {
return joinOp == JoinOperator.CROSS_JOIN || joinOp == JoinOperator.INNER_JOIN
|| joinOp == JoinOperator.LEFT_OUTER_JOIN || joinOp == JoinOperator.LEFT_ANTI_JOIN
|| joinOp == JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN;
}
@Override
public Set<SlotId> computeInputSlotIds(Analyzer analyzer) throws NotImplementedException {
Set<SlotId> result = Sets.newHashSet();
@ -89,28 +95,16 @@ public class CrossJoinNode extends JoinNodeBase {
protected Pair<Boolean, Boolean> needToCopyRightAndLeft() {
boolean copyleft = true;
boolean copyRight = true;
switch (joinOp) {
case LEFT_ANTI_JOIN:
case LEFT_SEMI_JOIN:
case NULL_AWARE_LEFT_ANTI_JOIN:
copyRight = false;
break;
case RIGHT_SEMI_JOIN:
case RIGHT_ANTI_JOIN:
copyleft = false;
break;
default:
break;
}
return Pair.of(copyleft, copyRight);
}
/**
* Only for Nereids.
*/
public CrossJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, List<TupleId> tupleIds,
List<Expr> srcToOutputList, TupleDescriptor intermediateTuple, TupleDescriptor outputTuple) {
super(id, "CROSS JOIN", StatisticalType.CROSS_JOIN_NODE, JoinOperator.CROSS_JOIN);
public NestedLoopJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, List<TupleId> tupleIds,
JoinOperator joinOperator, List<Expr> srcToOutputList, TupleDescriptor intermediateTuple,
TupleDescriptor outputTuple) {
super(id, "NESTED LOOP JOIN", StatisticalType.NESTED_LOOP_JOIN_NODE, joinOperator);
this.tupleIds.addAll(tupleIds);
children.add(outer);
children.add(inner);
@ -147,7 +141,7 @@ public class CrossJoinNode extends JoinNodeBase {
cardinality = Math.round(((double) cardinality) * computeOldSelectivity());
}
}
LOG.debug("stats CrossJoin: cardinality={}", Long.toString(cardinality));
LOG.debug("stats NestedLoopJoin: cardinality={}", Long.toString(cardinality));
}
@Override
@ -204,7 +198,7 @@ public class CrossJoinNode extends JoinNodeBase {
}
if (!conjuncts.isEmpty()) {
output.append(detailPrefix).append("other predicates: ").append(getExplainString(conjuncts)).append("\n");
output.append(detailPrefix).append("predicates: ").append(getExplainString(conjuncts)).append("\n");
}
output.append(detailPrefix).append(String.format("cardinality=%s", cardinality)).append("\n");
// todo unify in plan node

View File

@ -1050,7 +1050,7 @@ public class SingleNodePlanner {
&& candidateCardinalityIsSmaller(
candidate, tblRefToPlanNodeOfCandidate.second.getCardinality(),
newRoot, newRootRightChildCardinality)))
|| (candidate instanceof HashJoinNode && newRoot instanceof CrossJoinNode)) {
|| (candidate instanceof HashJoinNode && newRoot instanceof NestedLoopJoinNode)) {
newRoot = candidate;
minEntry = tblRefToPlanNodeOfCandidate;
newRootRightChildCardinality = cardinalityOfCandidate;
@ -2055,20 +2055,13 @@ public class SingleNodePlanner {
}
analyzer.markConjunctsAssigned(ojConjuncts);
if (eqJoinConjuncts.isEmpty()) {
// only inner join can change to cross join
if (innerRef.getJoinOp().isSemiAntiJoin()) {
throw new AnalysisException("non-equal " + innerRef.getJoinOp().toString()
+ " is not supported");
}
// construct cross join node
// LOG.debug("Join between {} and {} requires at least one conjunctive"
// + " equality predicate between the two tables",
// outerRef.getAliasAsName(), innerRef.getAliasAsName());
// TODO If there are eq join predicates then we should construct a hash join
CrossJoinNode result =
new CrossJoinNode(ctx.getNextNodeId(), outer, inner, innerRef);
NestedLoopJoinNode result =
new NestedLoopJoinNode(ctx.getNextNodeId(), outer, inner, innerRef);
result.addConjuncts(ojConjuncts);
result.init(analyzer);
return result;

View File

@ -27,8 +27,8 @@ public class DeriveFactory {
return new AnalyticEvalStatsDerive();
case ASSERT_NUM_ROWS_NODE:
return new AssertNumRowsStatsDerive();
case CROSS_JOIN_NODE:
return new CrossJoinStatsDerive();
case NESTED_LOOP_JOIN_NODE:
return new NestedLoopJoinStatsDerive();
case EMPTY_SET_NODE:
case REPEAT_NODE:
return new EmptySetStatsDerive();

View File

@ -24,10 +24,10 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* Derive CrossJoinNode statistics.
* Derive NestedLoopJoinNode statistics.
*/
public class CrossJoinStatsDerive extends BaseStatsDerive {
private static final Logger LOG = LogManager.getLogger(CrossJoinStatsDerive.class);
public class NestedLoopJoinStatsDerive extends BaseStatsDerive {
private static final Logger LOG = LogManager.getLogger(NestedLoopJoinStatsDerive.class);
@Override
protected long deriveRowCount() {

View File

@ -23,7 +23,7 @@ public enum StatisticalType {
ANALYTIC_EVAL_NODE,
ASSERT_NUM_ROWS_NODE,
BROKER_SCAN_NODE,
CROSS_JOIN_NODE,
NESTED_LOOP_JOIN_NODE,
EMPTY_SET_NODE,
ES_SCAN_NODE,
EXCEPT_NODE,

View File

@ -1024,7 +1024,7 @@ public class QueryPlanTest extends TestWithFeService {
+ " SELECT MAX(k9)\n" + " FROM test.pushdown_test);";
String explainString = getSQLPlanOrErrorMsg("explain " + sql);
Assert.assertTrue(explainString.contains("PLAN FRAGMENT"));
Assert.assertTrue(explainString.contains("CROSS JOIN"));
Assert.assertTrue(explainString.contains("NESTED LOOP JOIN"));
Assert.assertTrue(!explainString.contains("PREDICATES"));
}

View File

@ -161,7 +161,7 @@ public class StatisticDeriveTest extends TestWithFeService {
Assert.assertNotNull(stmtExecutor.planner().getFragments());
Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size());
System.out.println(getSQLPlanOrErrorMsg("explain " + sql));
assertSQLPlanOrErrorMsgContains(sql, "CROSS JOIN");
assertSQLPlanOrErrorMsgContains(sql, "NESTED LOOP JOIN");
assertSQLPlanOrErrorMsgContains(sql, "ASSERT NUMBER OF ROWS");
assertSQLPlanOrErrorMsgContains(sql, "EXCHANGE");
assertSQLPlanOrErrorMsgContains(sql, "AGGREGATE");

View File

@ -72,7 +72,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
public void testWithoutRewritten() throws Exception {
String query = "select * from tb1, tb2 where (tb1.k1 =1) or (tb2.k2=1)";
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test
@ -89,7 +89,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("(`tb1`.`k1` = 1 OR `tb1`.`k1` = 2)"));
Assert.assertTrue(planString.contains("(`tb2`.`k1` = 1 OR `tb2`.`k1` = 2)"));
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test
@ -97,7 +97,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
String query = "select * from tb1, tb2 where (tb1.k1>1 and tb2.k1=1) or (tb1.k1 <2 and tb2.k2=2)";
String planString = dorisAssert.query(query).explainQuery();
Assert.assertFalse(planString.contains("(`tb1`.`k1` > 1 OR `tb1`.`k1` < 2)"));
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test
@ -105,7 +105,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
String query = "select * from tb1, tb2 where (tb1.k1 between 1 and 3 and tb2.k1=1) or (tb1.k1 <2 and tb2.k2=2)";
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("`tb1`.`k1` <= 3"));
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test
@ -113,7 +113,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
String query = "select * from tb1, tb2 where (tb1.k1 >1 and tb1.k1 <3 and tb1.k1 <5 and tb2.k1=1) "
+ "or (tb1.k1 <2 and tb2.k2=2)";
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test
@ -122,7 +122,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
+ "or (tb1.k1 <2 and tb2.k2=2)";
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("`tb1`.`k1` < 5"));
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test
@ -132,7 +132,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("`tb1`.`k1` IN (1, 2)"));
Assert.assertTrue(planString.contains("`tb2`.`k1` IN (1, 2)"));
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test
@ -142,7 +142,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
String planString = dorisAssert.query(query).explainQuery();
Assert.assertTrue(planString.contains("`tb1`.`k1` IN (1, 2, 3)"));
Assert.assertTrue(planString.contains("`tb2`.`k1` IN (1, 2)"));
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test
@ -153,7 +153,7 @@ public class ExtractCommonFactorsRuleFunctionTest {
Assert.assertTrue(planString.contains("`tb1`.`k1` >= 1"));
Assert.assertTrue(planString.contains("`tb1`.`k1` <= 4"));
Assert.assertTrue(planString.contains("`tb2`.`k1` IN (1, 2, 3)"));
Assert.assertTrue(planString.contains("CROSS JOIN"));
Assert.assertTrue(planString.contains("NESTED LOOP JOIN"));
}
@Test

View File

@ -0,0 +1,41 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !join --
-- !join --
-- !join --
-- !join --
-- !join --
-- !join --
1 1
2 2
3 3
10 10
-- !join --
-- !join --
1 1
2 2
3 3
10 10
-- !join --
1 1
2 2
3 3
10 10
-- !join --
-- !join --
-- !join --
1 1
2 2
3 3
10 10

View File

@ -529,23 +529,11 @@ suite("test_join", "query,p0") {
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1
where b.k3 is not null order by 1, 2, 3, 4, 5 limit 65535"""
test {
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 > b.k1
where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 > 0
where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
def res15 = sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 > b.k1
where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 > 0
where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535"""
def res15 = sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 = b.k1 and a.k2 > 0 order by 1, 2, 3, 4, 5 limit 65535"""
def res16 = sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b
on a.k1 = b.k1 and a.k2 > 0 where b.k3 is not null
@ -565,52 +553,22 @@ suite("test_join", "query,p0") {
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 = b.k2 where a.k2 > 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 < b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k6 > "000"
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 = b.k2 where a.k2 > 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 > 0 where a.k2 > 0 and a.k6 > "000"
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 < b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 < b.k1 or a.k2 > 0 where a.k2 > 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
def res19 = sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 = b.k1
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 > 0 where a.k2 > 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b
on a.k1 < b.k1 or a.k2 > 0 where a.k2 > 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
def res19 = sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 = b.k1
left semi join ${tbName3} c on a.k2 = c.k2 order by 1, 2, 3, 4, 5 limit 65535"""
def res20 = sql"""select ${s} from (select distinct a.* from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1
left outer join ${tbName3} c on a.k2 = c.k2 where a.k1 is not null
@ -638,25 +596,13 @@ suite("test_join", "query,p0") {
def res24 = sql"""select ${s} from ${tbName1} a right outer join ${tbName1} b
on a.k1 = b.k1 where a.k2 is not null order by 1, 2, 3, 4, 5 limit 65535"""
check2_doris(res23, res24)
test {
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 > b.k1 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
def res25 = sql"""select ${s} from ${tbName2} a right semi join ${tbName1} b
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 > b.k1 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
def res25 = sql"""select ${s} from ${tbName2} a right semi join ${tbName1} b
on a.k1 = b.k1 and a.k2 > 0 order by 1, 2, 3, 4, 5 limit 65535"""
def res26 = sql"""select ${s} from ${tbName2} a right outer join ${tbName1} b on a.k1 = b.k1 and
a.k2 > 0 where a.k2 is not null order by 1, 2, 3, 4, 5 limit 65535"""
@ -675,52 +621,22 @@ suite("test_join", "query,p0") {
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 = b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 < b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 < b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
def res29 = sql"""select ${s} from ${tbName3} a right semi join ${tbName1} c on a.k1 = c.k1
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 = b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 < b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 = b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b
on a.k1 < b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
def res29 = sql"""select ${s} from ${tbName3} a right semi join ${tbName1} c on a.k1 = c.k1
right semi join ${tbName2} b on b.k2 = c.k2 order by 1, 2, 3, 4, 5 limit 65535"""
def res30 = sql"""select ${s} from (select distinct b.* from ${tbName3} a right outer join ${tbName1} c on a.k1 = c.k1
@ -744,25 +660,13 @@ suite("test_join", "query,p0") {
def res34 = sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b
on a.k1 = b.k1 where b.k3 is null order by 1, 2, 3, 4, 5 limit 65535"""
check2_doris(res33, res34)
test {
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 > b.k1 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
def res35 = sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 > b.k1 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
def res35 = sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 = b.k1 and a.k2 > 0 order by 1, 2, 3, 4, 5 limit 50000"""
def res36 = sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b
on a.k1 = b.k1 and a.k2 > 0 where b.k3 is null
@ -775,59 +679,29 @@ suite("test_join", "query,p0") {
order by 1, 2, 3, 4, 5 limit 65535"""
check2_doris(res37, res38)
test {
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 = b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 < b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 < b.k1 or a.k2 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
def res39 = sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b on a.k1 = b.k1
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 = b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 < b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b
on a.k1 < b.k1 or a.k2 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
def res39 = sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b on a.k1 = b.k1
left anti join ${tbName3} c on a.k2 = c.k2 order by 1, 2, 3, 4, 5 limit 65535"""
def res40 = sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1
left outer join ${tbName3} c on a.k2 = c.k2 where
@ -853,25 +727,13 @@ suite("test_join", "query,p0") {
on a.k1 = b.k1 where a.k2 is null
order by 1, 2, 3, 4, 5 limit 65535"""
check2_doris(res43, res44)
test {
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 > b.k1 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
def res45 = sql"""select ${s} from ${tbName2} a right anti join ${tbName1} b
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 > b.k1 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
def res45 = sql"""select ${s} from ${tbName2} a right anti join ${tbName1} b
on a.k1 = b.k1 and a.k2 > 0 order by 1, 2, 3, 4, 5 limit 65535"""
def res46 = sql"""select ${s} from ${tbName2} a right outer join ${tbName1} b
on a.k1 = b.k1 and a.k2 > 0 where a.k2 is null
@ -891,52 +753,22 @@ suite("test_join", "query,p0") {
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 = b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 < b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 < b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} c on a.k1 = c.k1
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 = b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 < b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 = b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b
on a.k1 < b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000"
order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from ${tbName1} a right anti join ${tbName2} c on a.k1 = c.k1
right anti join ${tbName3} b on c.k2 = b.k2 order by 1, 2, 3, 4, 5 limit 65535"""
sql"""select ${s} from (select distinct b.k1, b.k2, b.k3, b.k4, b.k5 from

View File

@ -0,0 +1,81 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_nestedloop_semi_anti_join", "query_p0") {
def tbl1 = "test_nestedloop_semi_anti_join1"
def tbl2 = "test_nestedloop_semi_anti_join2"
sql "DROP TABLE IF EXISTS ${tbl1}"
sql """
CREATE TABLE IF NOT EXISTS ${tbl1} (
`user_id` LARGEINT NOT NULL COMMENT "",
`user_id2` LARGEINT NOT NULL COMMENT ""
)
DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
"""
sql "DROP TABLE IF EXISTS ${tbl2}"
sql """
CREATE TABLE IF NOT EXISTS ${tbl2} (
`user_id` LARGEINT NOT NULL COMMENT "",
`user_id2` LARGEINT NOT NULL COMMENT ""
)
DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
"""
qt_join """
select * from ${tbl1} where exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where not exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where user_id in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where user_id not in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
sql """ INSERT INTO ${tbl1} VALUES (1, 1), (2, 2), (3, 3), (10, 10); """
qt_join """
select * from ${tbl1} where exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where not exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where user_id in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where user_id not in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
sql """ INSERT INTO ${tbl2} VALUES (2, 2), (3, 3), (4, 4), (0, 0); """
qt_join """
select * from ${tbl1} where exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where not exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where user_id in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
qt_join """
select * from ${tbl1} where user_id not in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id;
"""
sql "DROP TABLE IF EXISTS ${tbl1}"
sql "DROP TABLE IF EXISTS ${tbl2}"
}

View File

@ -57,9 +57,9 @@ suite("test_explain_tpch_sf_1_q11") {
explainStr ->
explainStr.contains("VTOP-N\n" +
" | order by: <slot 22> `\$a\$1`.`\$c\$2` DESC") &&
explainStr.contains("CROSS JOIN\n" +
explainStr.contains("VNESTED LOOP JOIN\n" +
" | join op: CROSS JOIN()\n" +
" | other predicates: <slot 76> > <slot 77> * 0.0001") &&
" | predicates: <slot 76> > <slot 77> * 0.0001") &&
explainStr.contains("VAGGREGATE (merge finalize)\n" +
" | output: sum(<slot 9> sum(<slot 31> * <slot 32>))\n" +
" | group by: <slot 8> `ps_partkey`") &&

View File

@ -74,9 +74,9 @@ suite("test_explain_tpch_sf_1_q22") {
explainStr.contains("vec output tuple id: 11") &&
explainStr.contains("output slot ids: 40 41 \n" +
" | hash output slot ids: 36 37 ") &&
explainStr.contains("VCROSS JOIN\n" +
explainStr.contains("VNESTED LOOP JOIN\n" +
" | join op: CROSS JOIN()\n" +
" | other predicates: <slot 45> > <slot 46>") &&
" | predicates: <slot 45> > <slot 46>") &&
explainStr.contains("TABLE: default_cluster:regression_test_tpch_sf1_p1_tpch_sf1.customer(customer), PREAGGREGATION: ON\n" +
" PREDICATES: substr(`c_phone`, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')") &&
explainStr.contains("TABLE: default_cluster:regression_test_tpch_sf1_p1_tpch_sf1.orders(orders), PREAGGREGATION: ON") &&