Do not perform partition randomization in affinity GI.

This commit is contained in:
obdev
2024-07-19 10:54:34 +00:00
committed by ob-robot
parent 18125dd373
commit eef6f2b442
7 changed files with 51 additions and 8 deletions

View File

@ -67,7 +67,8 @@ OB_SERIALIZE_MEMBER(ObPxSqcMeta,
p2p_dh_map_info_,
sqc_count_,
monitoring_info_,
branch_id_base_);
branch_id_base_,
partition_random_affinitize_);
OB_SERIALIZE_MEMBER(ObPxTask,
qc_id_,
dfo_id_,
@ -186,6 +187,7 @@ int ObPxSqcMeta::assign(const ObPxSqcMeta &other)
px_detectable_ids_ = other.px_detectable_ids_;
interrupt_by_dm_ = other.interrupt_by_dm_;
sqc_count_ = other.sqc_count_;
partition_random_affinitize_ = other.partition_random_affinitize_;
}
access_external_table_files_.reuse();
for (int i = 0; OB_SUCC(ret) && i < other.access_external_table_files_.count(); i++) {

View File

@ -365,6 +365,14 @@ public:
int64_t get_sqc_count() const { return sqc_count_;}
void set_sqc_order_gi_tasks(bool v) { sqc_order_gi_tasks_ = v; }
bool sqc_order_gi_tasks() const { return sqc_order_gi_tasks_; }
inline void set_partition_random_affinitize(bool partition_random_affinitize)
{
partition_random_affinitize_ = partition_random_affinitize;
}
inline bool partition_random_affinitize() const
{
return partition_random_affinitize_;
}
ObQCMonitoringInfo &get_monitoring_info() { return monitoring_info_; }
const ObQCMonitoringInfo &get_monitoring_info() const { return monitoring_info_; }
void set_branch_id_base(const int16_t branch_id_base) { branch_id_base_ = branch_id_base; }
@ -447,6 +455,7 @@ private:
ObP2PDhMapInfo p2p_dh_map_info_;
int64_t sqc_count_;
bool sqc_order_gi_tasks_;
bool partition_random_affinitize_{true}; // whether do partition random in gi task split
};
class ObDfo
@ -684,6 +693,15 @@ public:
ObP2PDhMapInfo &get_p2p_dh_map_info() { return p2p_dh_map_info_;};
bool force_bushy() { return force_bushy_; }
void set_force_bushy(bool flag) { force_bushy_ = flag; }
inline void set_partition_random_affinitize(bool partition_random_affinitize)
{
partition_random_affinitize_ = partition_random_affinitize;
}
inline bool partition_random_affinitize() const
{
return partition_random_affinitize_;
}
TO_STRING_KV(K_(execution_id),
K_(dfo_id),
K_(is_active),
@ -790,6 +808,7 @@ private:
// ---------------
ObPxCoordInfo *coord_info_ptr_;
bool force_bushy_;
bool partition_random_affinitize_{true}; // whether do partition random in gi task split
};

View File

@ -465,10 +465,15 @@ int ObDfoMgr::do_split(ObExecContext &exec_ctx,
ObPxCoordInfo &px_coord_info) const
{
int ret = OB_SUCCESS;
bool partition_random_affinitize = true;
bool top_px = (nullptr == parent_dfo);
bool got_fulltree_dfo = false;
ObDfo *dfo = NULL;
bool is_stack_overflow = false;
if (OB_NOT_NULL(phy_op->get_phy_plan())
&& phy_op->get_phy_plan()->get_min_cluster_version() >= CLUSTER_VERSION_4_3_3_0) {
partition_random_affinitize = false;
}
if (OB_FAIL(check_stack_overflow(is_stack_overflow))) {
LOG_WARN("failed to check stack overflow", K(ret));
} else if (is_stack_overflow) {
@ -609,6 +614,7 @@ int ObDfoMgr::do_split(ObExecContext &exec_ctx,
dfo->set_dop(1);
dfo->set_execution_id(exec_ctx.get_my_session()->get_current_execution_id());
dfo->set_px_sequence_id(dfo_int_gen.get_px_sequence_id());
dfo->set_partition_random_affinitize(partition_random_affinitize);
if (OB_NOT_NULL(phy_op->get_phy_plan()) && phy_op->get_phy_plan()->is_enable_px_fast_reclaim()) {
ObDetectableId sqc_detectable_id;
// if generate_detectable_id failed, means that server id is not ready

View File

@ -1077,14 +1077,17 @@ int ObAffinitizeGranuleSplitter::split_tasks_affinity(ObExecContext &ctx,
ObPxTabletInfo partition_row_info;
ObTabletIdxMap idx_map;
bool qc_order_gi_tasks = false;
bool partition_random_affinitize = true;
if (OB_ISNULL(my_session = GET_MY_SESSION(ctx)) || OB_ISNULL(ctx.get_sqc_handler())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get my session", K(ret), K(my_session), K(ctx.get_sqc_handler()));
} else {
qc_order_gi_tasks = ctx.get_sqc_handler()->get_sqc_init_arg().qc_order_gi_tasks_;
partition_random_affinitize =
ctx.get_sqc_handler()->get_sqc_init_arg().sqc_.partition_random_affinitize();
}
int64_t cur_idx = -1;
ObPxAffinityByRandom affinitize_rule(qc_order_gi_tasks);
ObPxAffinityByRandom affinitize_rule(qc_order_gi_tasks, partition_random_affinitize);
ARRAY_FOREACH_X(taskset.gi_task_set_, idx, cnt, OB_SUCC(ret)) {
if (cur_idx != taskset.gi_task_set_.at(idx).idx_) {
cur_idx = taskset.gi_task_set_.at(idx).idx_; // get all different parition key in Affinitize

View File

@ -524,6 +524,7 @@ int ObPXServerAddrUtil::build_dfo_sqc(ObExecContext &ctx,
sqc.set_parent_dfo_id(dfo.get_parent_dfo_id());
sqc.set_single_tsc_leaf_dfo(dfo.is_single_tsc_leaf_dfo());
sqc.get_monitoring_info().init(ctx);
sqc.set_partition_random_affinitize(dfo.partition_random_affinitize());
if (OB_SUCC(ret)) {
if (!dfo.get_p2p_dh_map_info().is_empty()) {
if (OB_FAIL(sqc.get_p2p_dh_map_info().assign(dfo.get_p2p_dh_map_info()))) {
@ -640,6 +641,7 @@ int ObPXServerAddrUtil::alloc_by_temp_child_distribution_inner(ObExecContext &ex
sqc.set_qc_server_id(child.get_qc_server_id());
sqc.set_parent_dfo_id(child.get_parent_dfo_id());
sqc.get_monitoring_info().init(exec_ctx);
sqc.set_partition_random_affinitize(child.partition_random_affinitize());
if (OB_SUCC(ret)) {
if (!child.get_p2p_dh_map_info().is_empty()) {
if (OB_FAIL(sqc.get_p2p_dh_map_info().assign(child.get_p2p_dh_map_info()))) {
@ -728,6 +730,7 @@ int ObPXServerAddrUtil::alloc_by_child_distribution(const ObDfo &child, ObDfo &p
sqc.set_qc_server_id(parent.get_qc_server_id());
sqc.set_parent_dfo_id(parent.get_parent_dfo_id());
sqc.get_monitoring_info().assign(child_sqc.get_monitoring_info());
sqc.set_partition_random_affinitize(child.partition_random_affinitize());
if (!parent.get_p2p_dh_map_info().is_empty()) {
if (OB_FAIL(sqc.get_p2p_dh_map_info().assign(parent.get_p2p_dh_map_info()))) {
LOG_WARN("fail to assign p2p dh map info", K(ret));
@ -823,6 +826,7 @@ int ObPXServerAddrUtil::alloc_by_random_distribution(ObExecContext &exec_ctx,
sqc.set_qc_server_id(parent.get_qc_server_id());
sqc.set_parent_dfo_id(parent.get_parent_dfo_id());
sqc.get_monitoring_info().init(exec_ctx);
sqc.set_partition_random_affinitize(parent.partition_random_affinitize());
if (OB_SUCC(ret)) {
if (!parent.get_p2p_dh_map_info().is_empty()) {
if (OB_FAIL(sqc.get_p2p_dh_map_info().assign(parent.get_p2p_dh_map_info()))) {
@ -872,6 +876,7 @@ int ObPXServerAddrUtil::alloc_by_local_distribution(ObExecContext &exec_ctx,
sqc.set_parent_dfo_id(dfo.get_parent_dfo_id());
sqc.set_qc_server_id(dfo.get_qc_server_id());
sqc.get_monitoring_info().init(exec_ctx);
sqc.set_partition_random_affinitize(dfo.partition_random_affinitize());
if (!dfo.get_p2p_dh_map_info().is_empty()) {
OZ(sqc.get_p2p_dh_map_info().assign(dfo.get_p2p_dh_map_info()));
}
@ -2745,12 +2750,16 @@ int ObPxAffinityByRandom::do_random(bool use_partition_info, uint64_t tenant_id)
}
}
if (partition_random_affinitize_) {
// 先打乱所有的序
auto compare_fun = [](TabletHashValue a, TabletHashValue b) -> bool { return a.hash_value_ > b.hash_value_; };
lib::ob_sort(tablet_hash_values_.begin(),
tablet_hash_values_.end(),
compare_fun);
LOG_TRACE("after sort partition_hash_values randomly", K(tablet_hash_values_), K(this), K(order_partitions_));
} else {
// donoting
}
// 如果没有partition的统计信息则将它们round放置
if (!use_partition_info) {
@ -3472,7 +3481,8 @@ int ObSlaveMapUtil::build_ppwj_ch_mn_map(ObExecContext &ctx, ObDfo &parent, ObDf
ARRAY_FOREACH_X(sqcs, idx, cnt, OB_SUCC(ret)) {
// 所有的affinitize计算都是SQC局部,不是全局的。
ObPxSqcMeta &sqc = *sqcs.at(idx);
ObPxAffinityByRandom affinitize_rule(sqc.sqc_order_gi_tasks());
ObPxAffinityByRandom affinitize_rule(sqc.sqc_order_gi_tasks(),
sqc.partition_random_affinitize());
LOG_TRACE("build ppwj_ch_mn_map", K(sqc));
ObPxTabletInfo partition_row_info;
locations.reset();

View File

@ -443,8 +443,10 @@ public:
TO_STRING_KV(K_(tablet_id), K_(tablet_idx), K_(hash_value), K_(worker_id), K_(partition_info));
};
public:
ObPxAffinityByRandom(bool order_partitions) :
worker_cnt_(0), tablet_hash_values_(), order_partitions_(order_partitions) {}
ObPxAffinityByRandom(bool order_partitions, bool partition_random_affinitize)
: worker_cnt_(0), tablet_hash_values_(), order_partitions_(order_partitions),
partition_random_affinitize_(partition_random_affinitize)
{}
virtual ~ObPxAffinityByRandom() = default;
int reserve(int64_t size) { return tablet_hash_values_.reserve(size); }
int add_partition(int64_t tablet_id,
@ -459,6 +461,7 @@ private:
int64_t worker_cnt_;
ObSEArray<TabletHashValue, 8> tablet_hash_values_;
bool order_partitions_;
bool partition_random_affinitize_;// whether do partition random in gi task split
};
class ObSlaveMapUtil

View File

@ -49,7 +49,7 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) {
int64_t parallel = 3;
int64_t tenant_id = 1;
ObPxTabletInfo px_part_info;
ObPxAffinityByRandom affinitize_rule(true);
ObPxAffinityByRandom affinitize_rule(true, true);
for (int i = 0; i < 5; ++i) {
px_part_info.physical_row_count_ = (10 - i) * 100;
affinitize_rule.add_partition(i,i,parallel,tenant_id,px_part_info);
@ -70,7 +70,7 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) {
int64_t parallel = 16;
int64_t tenant_id = 1;
ObPxTabletInfo px_part_info;
ObPxAffinityByRandom affinitize_rule(true);
ObPxAffinityByRandom affinitize_rule(true, true);
px_part_info.physical_row_count_ = 3000;
affinitize_rule.add_partition(0,0,parallel,tenant_id,px_part_info);
@ -100,7 +100,7 @@ TEST_F(ObRandomAffiTaskSplitTest, split_task_test) {
int64_t parallel = 3;
int64_t tenant_id = 1;
ObPxTabletInfo px_part_info;
ObPxAffinityByRandom affinitize_rule(true);
ObPxAffinityByRandom affinitize_rule(true, true);
px_part_info.physical_row_count_ = 3000;
affinitize_rule.add_partition(0,0,parallel,tenant_id,px_part_info);