[CP] fix correctness problem because of gi split tasks randomly
This commit is contained in:
		| @ -1096,14 +1096,14 @@ int ObAffinitizeGranuleSplitter::split_tasks_affinity(ObExecContext &ctx, | ||||
|       } else if (OB_FAIL(affinitize_rule.add_partition(tablet_loc.tablet_id_.id(), | ||||
|                                                       tablet_idx, | ||||
|                                                       parallelism, | ||||
|                                                       my_session->get_effective_tenant_id(), | ||||
|                                                       partition_row_info))) { | ||||
|         LOG_WARN("Failed to get affinitize taskid" , K(ret)); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   if (OB_FAIL(ret)) { | ||||
|   } else if (OB_FAIL(affinitize_rule.do_random(!partitions_info_.empty()))) { | ||||
|   } else if (OB_FAIL(affinitize_rule.do_random(!partitions_info_.empty(), | ||||
|                                                my_session->get_effective_tenant_id()))) { | ||||
|     LOG_WARN("failed to do random", K(ret)); | ||||
|   } else { | ||||
|     const ObIArray<ObPxAffinityByRandom::TabletHashValue> &partition_worker_pairs = affinitize_rule.get_result(); | ||||
|  | ||||
| @ -2607,7 +2607,6 @@ int ObPxChannelUtil::sqcs_channles_asyn_wait(ObIArray<ObPxSqcMeta *> &sqcs) | ||||
| int ObPxAffinityByRandom::add_partition(int64_t tablet_id, | ||||
|                                         int64_t tablet_idx, | ||||
|                                         int64_t worker_cnt, | ||||
|                                         uint64_t tenant_id, | ||||
|                                         ObPxTabletInfo &partition_row_info) | ||||
| { | ||||
|   int ret = OB_SUCCESS; | ||||
| @ -2616,8 +2615,7 @@ int ObPxAffinityByRandom::add_partition(int64_t tablet_id, | ||||
|     LOG_WARN("The worker cnt is invalid", K(ret), K(worker_cnt)); | ||||
|   } else { | ||||
|     TabletHashValue part_hash_value; | ||||
|     uint64_t value = (tenant_id << 32 | tablet_idx); | ||||
|     part_hash_value.hash_value_ = common::murmurhash(&value, sizeof(value), worker_cnt); | ||||
|     part_hash_value.hash_value_ = 0; | ||||
|     part_hash_value.tablet_idx_ = tablet_idx; | ||||
|     part_hash_value.tablet_id_ = tablet_id; | ||||
|     part_hash_value.partition_info_ = partition_row_info; | ||||
| @ -2629,7 +2627,7 @@ int ObPxAffinityByRandom::add_partition(int64_t tablet_id, | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| int ObPxAffinityByRandom::do_random(bool use_partition_info) | ||||
| int ObPxAffinityByRandom::do_random(bool use_partition_info, uint64_t tenant_id) | ||||
| { | ||||
|   int ret = OB_SUCCESS; | ||||
|   common::ObArray<int64_t> workers_load; | ||||
| @ -2650,12 +2648,31 @@ int ObPxAffinityByRandom::do_random(bool use_partition_info) | ||||
|         && (tablet_hash_values_.at(0).tablet_idx_ > tablet_hash_values_.at(1).tablet_idx_)) { | ||||
|       asc_order = false; | ||||
|     } | ||||
|     // in partition wise affinity scenario, partition_idx of a pair of partitions may be different. | ||||
|     // for example, T1 consists of p0, p1, p2 and T2 consists of p1, p2 | ||||
|     // T1.p1 <===> T2.p1  and T1.p2 <===> T2.p2 | ||||
|     // The partition_idx of T1.p1 is 1 and the partition_idx of T2.p1 is 0. | ||||
|     // If we calculate hash value of partition_idx and sort partitions by the hash value, | ||||
|     // T1.p1 and T2.p1 may be assigned to different worker. | ||||
|     // So we sort partitions by partition_idx and generate a relative_idx which starts from zero. | ||||
|     // Then calculate hash value with the relative_idx | ||||
|     auto part_idx_compare_fun = [](TabletHashValue a, TabletHashValue b) -> bool { return a.tablet_idx_ > b.tablet_idx_; }; | ||||
|     std::sort(tablet_hash_values_.begin(), | ||||
|               tablet_hash_values_.end(), | ||||
|               part_idx_compare_fun); | ||||
|     int64_t relative_idx = 0; | ||||
|     for (int64_t i = 0; i < tablet_hash_values_.count(); i++) { | ||||
|       uint64_t value = ((tenant_id << 32) | relative_idx); | ||||
|       tablet_hash_values_.at(i).hash_value_ = common::murmurhash(&value, sizeof(value), worker_cnt_); | ||||
|       relative_idx++; | ||||
|     } | ||||
|  | ||||
|     // 先打乱所有的序 | ||||
|     auto compare_fun = [](TabletHashValue a, TabletHashValue b) -> bool { return a.hash_value_ > b.hash_value_; }; | ||||
|     std::sort(tablet_hash_values_.begin(), | ||||
|               tablet_hash_values_.end(), | ||||
|               compare_fun); | ||||
|     LOG_TRACE("after sort partition_hash_values randomly", K(tablet_hash_values_)); | ||||
|  | ||||
|     // 如果没有partition的统计信息则将它们round放置 | ||||
|     if (!use_partition_info) { | ||||
| @ -3406,13 +3423,13 @@ int ObSlaveMapUtil::build_ppwj_ch_mn_map(ObExecContext &ctx, ObDfo &parent, ObDf | ||||
|         } else if (OB_FAIL(affinitize_rule.add_partition(location.tablet_id_.id(), | ||||
|                 tablet_idx, | ||||
|                 sqc.get_task_count(), | ||||
|                 ctx.get_my_session()->get_effective_tenant_id(), | ||||
|                 partition_row_info))) { | ||||
|           LOG_WARN("fail calc task_id", K(location.tablet_id_), K(sqc), K(ret)); | ||||
|         } | ||||
|       } | ||||
|       if (OB_FAIL(ret)) { | ||||
|       } else if (OB_FAIL(affinitize_rule.do_random(!sqc.get_partitions_info().empty()))) { | ||||
|       } else if (OB_FAIL(affinitize_rule.do_random(!sqc.get_partitions_info().empty(), | ||||
|                          ctx.get_my_session()->get_effective_tenant_id()))) { | ||||
|         LOG_WARN("failed to do random", K(ret)); | ||||
|       } else { | ||||
|         const ObIArray<ObPxAffinityByRandom::TabletHashValue> &partition_worker_pairs = | ||||
|  | ||||
| @ -443,9 +443,8 @@ public: | ||||
|   int add_partition(int64_t tablet_id, | ||||
|       int64_t tablet_idx, | ||||
|       int64_t worker_cnt, | ||||
|       uint64_t tenant_id, | ||||
|       ObPxTabletInfo &partition_row_info); | ||||
|   int do_random(bool use_partition_info); | ||||
|   int do_random(bool use_partition_info, uint64_t tenant_id); | ||||
|   const ObIArray<TabletHashValue> &get_result() { return tablet_hash_values_; } | ||||
|   static int get_tablet_info(int64_t tablet_id, ObIArray<ObPxTabletInfo> &partitions_info, ObPxTabletInfo &partition_info); | ||||
| private: | ||||
|  | ||||
| @ -52,18 +52,18 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) { | ||||
|     ObPxAffinityByRandom affinitize_rule; | ||||
|     for (int i = 0; i < 5; ++i) { | ||||
|       px_part_info.physical_row_count_ = (10 - i) * 100; | ||||
|       affinitize_rule.add_partition(i,i,parallel,tenant_id,px_part_info); | ||||
|       affinitize_rule.add_partition(i,i,parallel,px_part_info); | ||||
|     } | ||||
|     affinitize_rule.do_random(true); | ||||
|     affinitize_rule.do_random(true, tenant_id); | ||||
|     const common::ObIArray<ObPxAffinityByRandom::TabletHashValue>& result = affinitize_rule.get_result(); | ||||
|     for (int i = 0; i < result.count(); ++i) { | ||||
|       LOG_INFO("result", K(result.at(i).tablet_id_), K(result.at(i).worker_id_), K(result.at(i).partition_info_.physical_row_count_)); | ||||
|     } | ||||
|     ASSERT_EQ(1, result.at(0).worker_id_); | ||||
|     ASSERT_EQ(2, result.at(1).worker_id_); | ||||
|     ASSERT_EQ(0, result.at(1).worker_id_); | ||||
|     ASSERT_EQ(2, result.at(2).worker_id_); | ||||
|     ASSERT_EQ(0, result.at(3).worker_id_); | ||||
|     ASSERT_EQ(0, result.at(4).worker_id_); | ||||
|     ASSERT_EQ(2, result.at(3).worker_id_); | ||||
|     ASSERT_EQ(1, result.at(4).worker_id_); | ||||
|   } | ||||
|  | ||||
|   { | ||||
| @ -73,27 +73,27 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) { | ||||
|     ObPxAffinityByRandom affinitize_rule; | ||||
|  | ||||
|     px_part_info.physical_row_count_ = 3000; | ||||
|     affinitize_rule.add_partition(0,0,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(0,0,parallel,px_part_info); | ||||
|     px_part_info.physical_row_count_ = 1000; | ||||
|     affinitize_rule.add_partition(1,1,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(1,1,parallel,px_part_info); | ||||
|     px_part_info.physical_row_count_ = 2500; | ||||
|     affinitize_rule.add_partition(2,2,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(2,2,parallel,px_part_info); | ||||
|     px_part_info.physical_row_count_ = 3500; | ||||
|     affinitize_rule.add_partition(3,3,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(3,3,parallel,px_part_info); | ||||
|     px_part_info.physical_row_count_ = 2000; | ||||
|     affinitize_rule.add_partition(4,4,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(4,4,parallel,px_part_info); | ||||
|  | ||||
|     affinitize_rule.do_random(true); | ||||
|     affinitize_rule.do_random(true, tenant_id); | ||||
|  | ||||
|     const common::ObIArray<ObPxAffinityByRandom::TabletHashValue>& result = affinitize_rule.get_result(); | ||||
|     for (int i = 0; i < 5; ++i) { | ||||
|       LOG_INFO("result", K(result.at(i).tablet_id_), K(result.at(i).worker_id_), K(result.at(i).partition_info_.physical_row_count_)); | ||||
|     } | ||||
|     ASSERT_EQ(3, result.at(0).worker_id_); | ||||
|     ASSERT_EQ(0, result.at(1).worker_id_); | ||||
|     ASSERT_EQ(4, result.at(0).worker_id_); | ||||
|     ASSERT_EQ(2, result.at(1).worker_id_); | ||||
|     ASSERT_EQ(1, result.at(2).worker_id_); | ||||
|     ASSERT_EQ(2, result.at(3).worker_id_); | ||||
|     ASSERT_EQ(4, result.at(4).worker_id_); | ||||
|     ASSERT_EQ(0, result.at(3).worker_id_); | ||||
|     ASSERT_EQ(3, result.at(4).worker_id_); | ||||
|   } | ||||
|  | ||||
|   { | ||||
| @ -103,17 +103,17 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) { | ||||
|     ObPxAffinityByRandom affinitize_rule; | ||||
|  | ||||
|     px_part_info.physical_row_count_ = 3000; | ||||
|     affinitize_rule.add_partition(0,0,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(0,0,parallel,px_part_info); | ||||
|     px_part_info.physical_row_count_ = 4000; | ||||
|     affinitize_rule.add_partition(1,1,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(1,1,parallel,px_part_info); | ||||
|     px_part_info.physical_row_count_ = 2500; | ||||
|     affinitize_rule.add_partition(2,2,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(2,2,parallel,px_part_info); | ||||
|     px_part_info.physical_row_count_ = 1500; | ||||
|     affinitize_rule.add_partition(3,3,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(3,3,parallel,px_part_info); | ||||
|     px_part_info.physical_row_count_ = 2000; | ||||
|     affinitize_rule.add_partition(4,4,parallel,tenant_id,px_part_info); | ||||
|     affinitize_rule.add_partition(4,4,parallel,px_part_info); | ||||
|  | ||||
|     affinitize_rule.do_random(true); | ||||
|     affinitize_rule.do_random(true, tenant_id); | ||||
|  | ||||
|     const common::ObIArray<ObPxAffinityByRandom::TabletHashValue>& result = affinitize_rule.get_result(); | ||||
|     for (int i = 0; i < 5; ++i) { | ||||
| @ -121,10 +121,10 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) { | ||||
|     } | ||||
|  | ||||
|     ASSERT_EQ(1, result.at(0).worker_id_); | ||||
|     ASSERT_EQ(2, result.at(1).worker_id_); | ||||
|     ASSERT_EQ(0, result.at(1).worker_id_); | ||||
|     ASSERT_EQ(2, result.at(2).worker_id_); | ||||
|     ASSERT_EQ(0, result.at(3).worker_id_); | ||||
|     ASSERT_EQ(0, result.at(4).worker_id_); | ||||
|     ASSERT_EQ(2, result.at(3).worker_id_); | ||||
|     ASSERT_EQ(1, result.at(4).worker_id_); | ||||
|  } | ||||
|  | ||||
| } | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 sdc
					sdc