diff --git a/src/sql/engine/px/ob_granule_pump.cpp b/src/sql/engine/px/ob_granule_pump.cpp index c3c9dc68c7..dd1336ece9 100644 --- a/src/sql/engine/px/ob_granule_pump.cpp +++ b/src/sql/engine/px/ob_granule_pump.cpp @@ -1096,14 +1096,14 @@ int ObAffinitizeGranuleSplitter::split_tasks_affinity(ObExecContext &ctx, } else if (OB_FAIL(affinitize_rule.add_partition(tablet_loc.tablet_id_.id(), tablet_idx, parallelism, - my_session->get_effective_tenant_id(), partition_row_info))) { LOG_WARN("Failed to get affinitize taskid" , K(ret)); } } } if (OB_FAIL(ret)) { - } else if (OB_FAIL(affinitize_rule.do_random(!partitions_info_.empty()))) { + } else if (OB_FAIL(affinitize_rule.do_random(!partitions_info_.empty(), + my_session->get_effective_tenant_id()))) { LOG_WARN("failed to do random", K(ret)); } else { const ObIArray &partition_worker_pairs = affinitize_rule.get_result(); diff --git a/src/sql/engine/px/ob_px_util.cpp b/src/sql/engine/px/ob_px_util.cpp index 5ae0722bce..8257dcfd0c 100644 --- a/src/sql/engine/px/ob_px_util.cpp +++ b/src/sql/engine/px/ob_px_util.cpp @@ -2607,7 +2607,6 @@ int ObPxChannelUtil::sqcs_channles_asyn_wait(ObIArray &sqcs) int ObPxAffinityByRandom::add_partition(int64_t tablet_id, int64_t tablet_idx, int64_t worker_cnt, - uint64_t tenant_id, ObPxTabletInfo &partition_row_info) { int ret = OB_SUCCESS; @@ -2616,8 +2615,7 @@ int ObPxAffinityByRandom::add_partition(int64_t tablet_id, LOG_WARN("The worker cnt is invalid", K(ret), K(worker_cnt)); } else { TabletHashValue part_hash_value; - uint64_t value = (tenant_id << 32 | tablet_idx); - part_hash_value.hash_value_ = common::murmurhash(&value, sizeof(value), worker_cnt); + part_hash_value.hash_value_ = 0; part_hash_value.tablet_idx_ = tablet_idx; part_hash_value.tablet_id_ = tablet_id; part_hash_value.partition_info_ = partition_row_info; @@ -2629,7 +2627,7 @@ int ObPxAffinityByRandom::add_partition(int64_t tablet_id, return ret; } -int ObPxAffinityByRandom::do_random(bool use_partition_info) +int ObPxAffinityByRandom::do_random(bool use_partition_info, uint64_t tenant_id) { int ret = OB_SUCCESS; common::ObArray workers_load; @@ -2650,12 +2648,31 @@ int ObPxAffinityByRandom::do_random(bool use_partition_info) && (tablet_hash_values_.at(0).tablet_idx_ > tablet_hash_values_.at(1).tablet_idx_)) { asc_order = false; } + // in partition wise affinity scenario, partition_idx of a pair of partitions may be different. + // for example, T1 consists of p0, p1, p2 and T2 consists of p1, p2 + // T1.p1 <===> T2.p1 and T1.p2 <===> T2.p2 + // The partition_idx of T1.p1 is 1 and the partition_idx of T2.p1 is 0. + // If we calculate hash value of partition_idx and sort partitions by the hash value, + // T1.p1 and T2.p1 may be assigned to different worker. + // So we sort partitions by partition_idx and generate a relative_idx which starts from zero. + // Then calculate hash value with the relative_idx + auto part_idx_compare_fun = [](TabletHashValue a, TabletHashValue b) -> bool { return a.tablet_idx_ > b.tablet_idx_; }; + std::sort(tablet_hash_values_.begin(), + tablet_hash_values_.end(), + part_idx_compare_fun); + int64_t relative_idx = 0; + for (int64_t i = 0; i < tablet_hash_values_.count(); i++) { + uint64_t value = ((tenant_id << 32) | relative_idx); + tablet_hash_values_.at(i).hash_value_ = common::murmurhash(&value, sizeof(value), worker_cnt_); + relative_idx++; + } // 先打乱所有的序 auto compare_fun = [](TabletHashValue a, TabletHashValue b) -> bool { return a.hash_value_ > b.hash_value_; }; std::sort(tablet_hash_values_.begin(), tablet_hash_values_.end(), compare_fun); + LOG_TRACE("after sort partition_hash_values randomly", K(tablet_hash_values_)); // 如果没有partition的统计信息则将它们round放置 if (!use_partition_info) { @@ -3406,13 +3423,13 @@ int ObSlaveMapUtil::build_ppwj_ch_mn_map(ObExecContext &ctx, ObDfo &parent, ObDf } else if (OB_FAIL(affinitize_rule.add_partition(location.tablet_id_.id(), tablet_idx, sqc.get_task_count(), - ctx.get_my_session()->get_effective_tenant_id(), partition_row_info))) { LOG_WARN("fail calc task_id", K(location.tablet_id_), K(sqc), K(ret)); } } if (OB_FAIL(ret)) { - } else if (OB_FAIL(affinitize_rule.do_random(!sqc.get_partitions_info().empty()))) { + } else if (OB_FAIL(affinitize_rule.do_random(!sqc.get_partitions_info().empty(), + ctx.get_my_session()->get_effective_tenant_id()))) { LOG_WARN("failed to do random", K(ret)); } else { const ObIArray &partition_worker_pairs = diff --git a/src/sql/engine/px/ob_px_util.h b/src/sql/engine/px/ob_px_util.h index 3a09833809..1a2be62d69 100644 --- a/src/sql/engine/px/ob_px_util.h +++ b/src/sql/engine/px/ob_px_util.h @@ -443,9 +443,8 @@ public: int add_partition(int64_t tablet_id, int64_t tablet_idx, int64_t worker_cnt, - uint64_t tenant_id, ObPxTabletInfo &partition_row_info); - int do_random(bool use_partition_info); + int do_random(bool use_partition_info, uint64_t tenant_id); const ObIArray &get_result() { return tablet_hash_values_; } static int get_tablet_info(int64_t tablet_id, ObIArray &partitions_info, ObPxTabletInfo &partition_info); private: diff --git a/unittest/sql/engine/px/test_random_affi.cpp b/unittest/sql/engine/px/test_random_affi.cpp index 67de4b8513..84793dc6a9 100644 --- a/unittest/sql/engine/px/test_random_affi.cpp +++ b/unittest/sql/engine/px/test_random_affi.cpp @@ -52,18 +52,18 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) { ObPxAffinityByRandom affinitize_rule; for (int i = 0; i < 5; ++i) { px_part_info.physical_row_count_ = (10 - i) * 100; - affinitize_rule.add_partition(i,i,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(i,i,parallel,px_part_info); } - affinitize_rule.do_random(true); + affinitize_rule.do_random(true, tenant_id); const common::ObIArray& result = affinitize_rule.get_result(); for (int i = 0; i < result.count(); ++i) { LOG_INFO("result", K(result.at(i).tablet_id_), K(result.at(i).worker_id_), K(result.at(i).partition_info_.physical_row_count_)); } ASSERT_EQ(1, result.at(0).worker_id_); - ASSERT_EQ(2, result.at(1).worker_id_); + ASSERT_EQ(0, result.at(1).worker_id_); ASSERT_EQ(2, result.at(2).worker_id_); - ASSERT_EQ(0, result.at(3).worker_id_); - ASSERT_EQ(0, result.at(4).worker_id_); + ASSERT_EQ(2, result.at(3).worker_id_); + ASSERT_EQ(1, result.at(4).worker_id_); } { @@ -73,27 +73,27 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) { ObPxAffinityByRandom affinitize_rule; px_part_info.physical_row_count_ = 3000; - affinitize_rule.add_partition(0,0,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(0,0,parallel,px_part_info); px_part_info.physical_row_count_ = 1000; - affinitize_rule.add_partition(1,1,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(1,1,parallel,px_part_info); px_part_info.physical_row_count_ = 2500; - affinitize_rule.add_partition(2,2,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(2,2,parallel,px_part_info); px_part_info.physical_row_count_ = 3500; - affinitize_rule.add_partition(3,3,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(3,3,parallel,px_part_info); px_part_info.physical_row_count_ = 2000; - affinitize_rule.add_partition(4,4,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(4,4,parallel,px_part_info); - affinitize_rule.do_random(true); + affinitize_rule.do_random(true, tenant_id); const common::ObIArray& result = affinitize_rule.get_result(); for (int i = 0; i < 5; ++i) { LOG_INFO("result", K(result.at(i).tablet_id_), K(result.at(i).worker_id_), K(result.at(i).partition_info_.physical_row_count_)); } - ASSERT_EQ(3, result.at(0).worker_id_); - ASSERT_EQ(0, result.at(1).worker_id_); + ASSERT_EQ(4, result.at(0).worker_id_); + ASSERT_EQ(2, result.at(1).worker_id_); ASSERT_EQ(1, result.at(2).worker_id_); - ASSERT_EQ(2, result.at(3).worker_id_); - ASSERT_EQ(4, result.at(4).worker_id_); + ASSERT_EQ(0, result.at(3).worker_id_); + ASSERT_EQ(3, result.at(4).worker_id_); } { @@ -103,17 +103,17 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) { ObPxAffinityByRandom affinitize_rule; px_part_info.physical_row_count_ = 3000; - affinitize_rule.add_partition(0,0,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(0,0,parallel,px_part_info); px_part_info.physical_row_count_ = 4000; - affinitize_rule.add_partition(1,1,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(1,1,parallel,px_part_info); px_part_info.physical_row_count_ = 2500; - affinitize_rule.add_partition(2,2,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(2,2,parallel,px_part_info); px_part_info.physical_row_count_ = 1500; - affinitize_rule.add_partition(3,3,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(3,3,parallel,px_part_info); px_part_info.physical_row_count_ = 2000; - affinitize_rule.add_partition(4,4,parallel,tenant_id,px_part_info); + affinitize_rule.add_partition(4,4,parallel,px_part_info); - affinitize_rule.do_random(true); + affinitize_rule.do_random(true, tenant_id); const common::ObIArray& result = affinitize_rule.get_result(); for (int i = 0; i < 5; ++i) { @@ -121,10 +121,10 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) { } ASSERT_EQ(1, result.at(0).worker_id_); - ASSERT_EQ(2, result.at(1).worker_id_); + ASSERT_EQ(0, result.at(1).worker_id_); ASSERT_EQ(2, result.at(2).worker_id_); - ASSERT_EQ(0, result.at(3).worker_id_); - ASSERT_EQ(0, result.at(4).worker_id_); + ASSERT_EQ(2, result.at(3).worker_id_); + ASSERT_EQ(1, result.at(4).worker_id_); } }