[CP] fix correctness problem because of gi split tasks randomly

This commit is contained in:
sdc
2023-09-20 14:15:14 +00:00
committed by ob-robot
parent cc74381bb9
commit db195563f0
4 changed files with 50 additions and 34 deletions

View File

@ -1096,14 +1096,14 @@ int ObAffinitizeGranuleSplitter::split_tasks_affinity(ObExecContext &ctx,
} else if (OB_FAIL(affinitize_rule.add_partition(tablet_loc.tablet_id_.id(),
tablet_idx,
parallelism,
my_session->get_effective_tenant_id(),
partition_row_info))) {
LOG_WARN("Failed to get affinitize taskid" , K(ret));
}
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(affinitize_rule.do_random(!partitions_info_.empty()))) {
} else if (OB_FAIL(affinitize_rule.do_random(!partitions_info_.empty(),
my_session->get_effective_tenant_id()))) {
LOG_WARN("failed to do random", K(ret));
} else {
const ObIArray<ObPxAffinityByRandom::TabletHashValue> &partition_worker_pairs = affinitize_rule.get_result();

View File

@ -2607,7 +2607,6 @@ int ObPxChannelUtil::sqcs_channles_asyn_wait(ObIArray<ObPxSqcMeta *> &sqcs)
int ObPxAffinityByRandom::add_partition(int64_t tablet_id,
int64_t tablet_idx,
int64_t worker_cnt,
uint64_t tenant_id,
ObPxTabletInfo &partition_row_info)
{
int ret = OB_SUCCESS;
@ -2616,8 +2615,7 @@ int ObPxAffinityByRandom::add_partition(int64_t tablet_id,
LOG_WARN("The worker cnt is invalid", K(ret), K(worker_cnt));
} else {
TabletHashValue part_hash_value;
uint64_t value = (tenant_id << 32 | tablet_idx);
part_hash_value.hash_value_ = common::murmurhash(&value, sizeof(value), worker_cnt);
part_hash_value.hash_value_ = 0;
part_hash_value.tablet_idx_ = tablet_idx;
part_hash_value.tablet_id_ = tablet_id;
part_hash_value.partition_info_ = partition_row_info;
@ -2629,7 +2627,7 @@ int ObPxAffinityByRandom::add_partition(int64_t tablet_id,
return ret;
}
int ObPxAffinityByRandom::do_random(bool use_partition_info)
int ObPxAffinityByRandom::do_random(bool use_partition_info, uint64_t tenant_id)
{
int ret = OB_SUCCESS;
common::ObArray<int64_t> workers_load;
@ -2650,12 +2648,31 @@ int ObPxAffinityByRandom::do_random(bool use_partition_info)
&& (tablet_hash_values_.at(0).tablet_idx_ > tablet_hash_values_.at(1).tablet_idx_)) {
asc_order = false;
}
// in partition wise affinity scenario, partition_idx of a pair of partitions may be different.
// for example, T1 consists of p0, p1, p2 and T2 consists of p1, p2
// T1.p1 <===> T2.p1 and T1.p2 <===> T2.p2
// The partition_idx of T1.p1 is 1 and the partition_idx of T2.p1 is 0.
// If we calculate hash value of partition_idx and sort partitions by the hash value,
// T1.p1 and T2.p1 may be assigned to different worker.
// So we sort partitions by partition_idx and generate a relative_idx which starts from zero.
// Then calculate hash value with the relative_idx
auto part_idx_compare_fun = [](TabletHashValue a, TabletHashValue b) -> bool { return a.tablet_idx_ > b.tablet_idx_; };
std::sort(tablet_hash_values_.begin(),
tablet_hash_values_.end(),
part_idx_compare_fun);
int64_t relative_idx = 0;
for (int64_t i = 0; i < tablet_hash_values_.count(); i++) {
uint64_t value = ((tenant_id << 32) | relative_idx);
tablet_hash_values_.at(i).hash_value_ = common::murmurhash(&value, sizeof(value), worker_cnt_);
relative_idx++;
}
// 先打乱所有的序
auto compare_fun = [](TabletHashValue a, TabletHashValue b) -> bool { return a.hash_value_ > b.hash_value_; };
std::sort(tablet_hash_values_.begin(),
tablet_hash_values_.end(),
compare_fun);
LOG_TRACE("after sort partition_hash_values randomly", K(tablet_hash_values_));
// 如果没有partition的统计信息则将它们round放置
if (!use_partition_info) {
@ -3406,13 +3423,13 @@ int ObSlaveMapUtil::build_ppwj_ch_mn_map(ObExecContext &ctx, ObDfo &parent, ObDf
} else if (OB_FAIL(affinitize_rule.add_partition(location.tablet_id_.id(),
tablet_idx,
sqc.get_task_count(),
ctx.get_my_session()->get_effective_tenant_id(),
partition_row_info))) {
LOG_WARN("fail calc task_id", K(location.tablet_id_), K(sqc), K(ret));
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(affinitize_rule.do_random(!sqc.get_partitions_info().empty()))) {
} else if (OB_FAIL(affinitize_rule.do_random(!sqc.get_partitions_info().empty(),
ctx.get_my_session()->get_effective_tenant_id()))) {
LOG_WARN("failed to do random", K(ret));
} else {
const ObIArray<ObPxAffinityByRandom::TabletHashValue> &partition_worker_pairs =

View File

@ -443,9 +443,8 @@ public:
int add_partition(int64_t tablet_id,
int64_t tablet_idx,
int64_t worker_cnt,
uint64_t tenant_id,
ObPxTabletInfo &partition_row_info);
int do_random(bool use_partition_info);
int do_random(bool use_partition_info, uint64_t tenant_id);
const ObIArray<TabletHashValue> &get_result() { return tablet_hash_values_; }
static int get_tablet_info(int64_t tablet_id, ObIArray<ObPxTabletInfo> &partitions_info, ObPxTabletInfo &partition_info);
private:

View File

@ -52,18 +52,18 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) {
ObPxAffinityByRandom affinitize_rule;
for (int i = 0; i < 5; ++i) {
px_part_info.physical_row_count_ = (10 - i) * 100;
affinitize_rule.add_partition(i,i,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(i,i,parallel,px_part_info);
}
affinitize_rule.do_random(true);
affinitize_rule.do_random(true, tenant_id);
const common::ObIArray<ObPxAffinityByRandom::TabletHashValue>& result = affinitize_rule.get_result();
for (int i = 0; i < result.count(); ++i) {
LOG_INFO("result", K(result.at(i).tablet_id_), K(result.at(i).worker_id_), K(result.at(i).partition_info_.physical_row_count_));
}
ASSERT_EQ(1, result.at(0).worker_id_);
ASSERT_EQ(2, result.at(1).worker_id_);
ASSERT_EQ(0, result.at(1).worker_id_);
ASSERT_EQ(2, result.at(2).worker_id_);
ASSERT_EQ(0, result.at(3).worker_id_);
ASSERT_EQ(0, result.at(4).worker_id_);
ASSERT_EQ(2, result.at(3).worker_id_);
ASSERT_EQ(1, result.at(4).worker_id_);
}
{
@ -73,27 +73,27 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) {
ObPxAffinityByRandom affinitize_rule;
px_part_info.physical_row_count_ = 3000;
affinitize_rule.add_partition(0,0,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(0,0,parallel,px_part_info);
px_part_info.physical_row_count_ = 1000;
affinitize_rule.add_partition(1,1,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(1,1,parallel,px_part_info);
px_part_info.physical_row_count_ = 2500;
affinitize_rule.add_partition(2,2,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(2,2,parallel,px_part_info);
px_part_info.physical_row_count_ = 3500;
affinitize_rule.add_partition(3,3,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(3,3,parallel,px_part_info);
px_part_info.physical_row_count_ = 2000;
affinitize_rule.add_partition(4,4,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(4,4,parallel,px_part_info);
affinitize_rule.do_random(true);
affinitize_rule.do_random(true, tenant_id);
const common::ObIArray<ObPxAffinityByRandom::TabletHashValue>& result = affinitize_rule.get_result();
for (int i = 0; i < 5; ++i) {
LOG_INFO("result", K(result.at(i).tablet_id_), K(result.at(i).worker_id_), K(result.at(i).partition_info_.physical_row_count_));
}
ASSERT_EQ(3, result.at(0).worker_id_);
ASSERT_EQ(0, result.at(1).worker_id_);
ASSERT_EQ(4, result.at(0).worker_id_);
ASSERT_EQ(2, result.at(1).worker_id_);
ASSERT_EQ(1, result.at(2).worker_id_);
ASSERT_EQ(2, result.at(3).worker_id_);
ASSERT_EQ(4, result.at(4).worker_id_);
ASSERT_EQ(0, result.at(3).worker_id_);
ASSERT_EQ(3, result.at(4).worker_id_);
}
{
@ -103,17 +103,17 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) {
ObPxAffinityByRandom affinitize_rule;
px_part_info.physical_row_count_ = 3000;
affinitize_rule.add_partition(0,0,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(0,0,parallel,px_part_info);
px_part_info.physical_row_count_ = 4000;
affinitize_rule.add_partition(1,1,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(1,1,parallel,px_part_info);
px_part_info.physical_row_count_ = 2500;
affinitize_rule.add_partition(2,2,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(2,2,parallel,px_part_info);
px_part_info.physical_row_count_ = 1500;
affinitize_rule.add_partition(3,3,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(3,3,parallel,px_part_info);
px_part_info.physical_row_count_ = 2000;
affinitize_rule.add_partition(4,4,parallel,tenant_id,px_part_info);
affinitize_rule.add_partition(4,4,parallel,px_part_info);
affinitize_rule.do_random(true);
affinitize_rule.do_random(true, tenant_id);
const common::ObIArray<ObPxAffinityByRandom::TabletHashValue>& result = affinitize_rule.get_result();
for (int i = 0; i < 5; ++i) {
@ -121,10 +121,10 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) {
}
ASSERT_EQ(1, result.at(0).worker_id_);
ASSERT_EQ(2, result.at(1).worker_id_);
ASSERT_EQ(0, result.at(1).worker_id_);
ASSERT_EQ(2, result.at(2).worker_id_);
ASSERT_EQ(0, result.at(3).worker_id_);
ASSERT_EQ(0, result.at(4).worker_id_);
ASSERT_EQ(2, result.at(3).worker_id_);
ASSERT_EQ(1, result.at(4).worker_id_);
}
}