From 9468711f9fa67228e48202cbceebeeb886ecc1d3 Mon Sep 17 00:00:00 2001 From: HappenLee Date: Fri, 13 Jan 2023 10:18:05 +0800 Subject: [PATCH] [Bug](join) fix bug null aware left anti join not correct result (#15841) --- be/src/common/config.h | 2 +- be/src/olap/push_handler.cpp | 2 +- be/src/vec/exec/join/vhash_join_node.cpp | 5 ++--- regression-test/data/query_p0/join/test_join.out | 4 ++++ regression-test/suites/query_p0/join/test_join.groovy | 6 +++--- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 021410f26b..32a6b4f0f6 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -487,7 +487,7 @@ CONF_mInt64(write_buffer_size, "209715200"); // max buffer size used in memtable for the aggregated table, default 400MB CONF_mInt64(write_buffer_size_for_agg, "419430400"); // write buffer size in push task for sparkload, default 1GB -CONF_mInt64(write_buffer_size_for_sparkload, "1073741824"); +CONF_mInt64(flush_size_for_sparkload, "1073741824"); // following 2 configs limit the memory consumption of load process on a Backend. // eg: memory limit to 80% of mem limit config but up to 100GB(default) diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 155b322c85..55472c6d82 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -250,7 +250,7 @@ Status PushHandler::_convert_v2(TabletSharedPtr cur_tablet, RowsetSharedPtr* cur VLOG_NOTICE << "start to convert etl file to delta."; while (!reader->eof()) { if (reader->mem_pool()->mem_tracker()->consumption() > - config::write_buffer_size_for_sparkload) { + config::flush_size_for_sparkload) { RETURN_NOT_OK(rowset_writer->flush()); reader->mem_pool()->free_all(); } diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 31f90807fd..c6e6ecea45 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -861,10 +861,9 @@ Status HashJoinNode::sink(doris::RuntimeState* state, vectorized::Block* in_bloc _process_hashtable_ctx_variants_init(state); } - // Since the comparison of null values is meaningless, left anti join should not output null + // Since the comparison of null values is meaningless, null aware left anti join should not output null // when the build side is not empty. - if (eos && !_build_blocks->empty() && - (_join_op == TJoinOp::LEFT_ANTI_JOIN || _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN)) { + if (eos && !_build_blocks->empty() && _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { _probe_ignore_null = true; } return Status::OK(); diff --git a/regression-test/data/query_p0/join/test_join.out b/regression-test/data/query_p0/join/test_join.out index f9d5b36279..e4d2ebcbb1 100644 --- a/regression-test/data/query_p0/join/test_join.out +++ b/regression-test/data/query_p0/join/test_join.out @@ -1217,6 +1217,7 @@ false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 3 -- !left_anti_join_with_other_pred -- +\N 1 2 3 @@ -1234,6 +1235,7 @@ false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 15 -- !left_anti_join_null_1 -- +\N 4 5 6 @@ -1780,6 +1782,8 @@ false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0. false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 false 3 1989 1002 11011905 24453.325 false 2012-03-14 2000-01-01T00:00 yunlj8@nk 78945.0 3654.0 string12345 0 +-- !join_bug4 -- + -- !join_basic1 -- false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 false 2 1986 1001 11011903 1243.500 false 1901-12-31 1989-03-21T13:00 wangynnsf 20.268 789.25 string12345 -170141183460469231731687303715884105727 diff --git a/regression-test/suites/query_p0/join/test_join.groovy b/regression-test/suites/query_p0/join/test_join.groovy index 7d5901e5f6..06d923bc7f 100644 --- a/regression-test/suites/query_p0/join/test_join.groovy +++ b/regression-test/suites/query_p0/join/test_join.groovy @@ -933,7 +933,7 @@ suite("test_join", "query,p0") { // https://github.com/apache/doris/issues/4210 qt_join_bug3"""select * from baseall t1 where k1 = (select min(k1) from test t2 where t2.k1 = t1.k1 and t2.k2=t1.k2) order by k1""" - + qt_join_bug4"""select b.k1 from baseall b where b.k1 not in( select k1 from baseall where k1 is not null )""" // basic join @@ -975,7 +975,7 @@ suite("test_join", "query,p0") { def res71 = sql"""select * from ${tbName2} a left anti join ${tbName1} b on (a.${c} = b.${c}) order by a.k1, a.k2, a.k3""" def res72 = sql"""select distinct a.* from ${tbName2} a left outer join ${tbName1} b on (a.${c} = b.${c}) - where b.k1 is null and a.k1 is not null order by a.k1, a.k2, a.k3""" + where b.k1 is null order by a.k1, a.k2, a.k3""" check2_doris(res71, res72) def res73 = sql"""select * from ${tbName2} a right anti join ${tbName1} b on (a.${c} = b.${c}) @@ -1083,7 +1083,7 @@ suite("test_join", "query,p0") { def res85 = sql"""select a.k1, a.k2 from ${tbName2} a left anti join ${null_name} b on a.k1 = b.n2 order by 1, 2""" - def res86 = sql"""select k1, k2 from ${tbName2} where k1 is not null order by k1, k2""" + def res86 = sql"""select k1, k2 from ${tbName2} order by k1, k2""" check2_doris(res85, res86) def res87 = sql"""select b.n1, b.n2 from ${tbName2} a right anti join ${null_name} b on a.k1 = b.n2