diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 11e9a96117..1828edabbe 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -851,7 +851,11 @@ const char *const OB_DIAG_TENANT_NAME = "diag"; //for sync ddl (ClusterID_TenantID_SchemaVersion) const char *const OB_DDL_ID_VAR_NAME = "__oceanbase_ddl_id"; const int64_t OB_MAX_DDL_ID_STR_LENGTH = 64; +#ifdef ERRSIM +const int64_t OB_MAX_DDL_SINGLE_REPLICA_BUILD_TIMEOUT = 30L * 60L * 1000L * 1000L; // 30 minutes +#else const int64_t OB_MAX_DDL_SINGLE_REPLICA_BUILD_TIMEOUT = 7L * 24L * 60L * 60L * 1000L * 1000L; // 7days +#endif const int64_t OB_MAX_PARTITION_SHARDING_LENGTH = 10; diff --git a/deps/oblib/src/lib/profile/ob_trace_id.h b/deps/oblib/src/lib/profile/ob_trace_id.h index 4c5a876930..da7e919d13 100644 --- a/deps/oblib/src/lib/profile/ob_trace_id.h +++ b/deps/oblib/src/lib/profile/ob_trace_id.h @@ -52,6 +52,7 @@ struct ObCurTraceId id_.is_user_request_ = 0; id_.is_ipv6_ = ip_port.using_ipv6(); id_.reserved_ = 0; + id_.sub_task_ = 0; id_.port_ = static_cast(ip_port.get_port()); if (ip_port.using_ipv6()) { id_.ipv6_[0] = ip_port.get_ipv6_low(); @@ -157,6 +158,11 @@ struct ObCurTraceId return ret; } + inline void set_sub_id(const int32_t sub_id) + { + id_.sub_task_ = sub_id & ((1 << 12) - 1); + } + inline int64_t hash() const { int64_t hash_value = 0; @@ -181,8 +187,9 @@ struct ObCurTraceId uint32_t ip_: 32; uint16_t port_: 16; uint8_t is_user_request_: 1; - uint8_t is_ipv6_:1; - uint16_t reserved_: 14; + uint8_t is_ipv6_: 1; + uint16_t reserved_: 2; + uint16_t sub_task_: 12; uint64_t seq_: 64; uint64_t ipv6_[2]; } id_; @@ -214,6 +221,14 @@ struct ObCurTraceId } } + inline static void set_sub_id(const int32_t sub_id) + { + TraceId *trace_id = get_trace_id(); + if (NULL != trace_id) { + trace_id->set_sub_id(sub_id); + } + } + inline static void reset() { TraceId *trace_id = get_trace_id(); diff --git a/mittest/simple_server/CMakeLists.txt b/mittest/simple_server/CMakeLists.txt index 1b9a670aa9..15083d1334 100644 --- a/mittest/simple_server/CMakeLists.txt +++ b/mittest/simple_server/CMakeLists.txt @@ -88,6 +88,7 @@ ob_unittest_observer(test_big_tx_data test_big_tx_data.cpp) ob_unittest_observer(test_fast_commit_report fast_commit_report.cpp) #ob_unittest_observer(test_mvcc_gc test_mvcc_gc.cpp) ob_unittest_observer(test_ob_simple_rto test_ob_simple_rto.cpp) +ob_unittest_observer(test_ddl_task test_ddl_task.cpp) ob_unittest_observer(test_all_virtual_proxy_partition_info_default_value test_all_virtual_proxy_partition_info_default_value.cpp) ob_unittest_observer(test_get_stopped_zone_list test_get_stopped_zone_list.cpp) ob_unittest_observer(test_lock_table_with_tx test_lock_table_with_tx.cpp) diff --git a/mittest/simple_server/test_ddl_task.cpp b/mittest/simple_server/test_ddl_task.cpp new file mode 100644 index 0000000000..4d094124a7 --- /dev/null +++ b/mittest/simple_server/test_ddl_task.cpp @@ -0,0 +1,100 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define USING_LOG_PREFIX SERVER +#define protected public +#define private public + +#include "env/ob_simple_cluster_test_base.h" +#include "lib/mysqlclient/ob_mysql_result.h" +#include "rootserver/ddl_task/ob_ddl_task.h" + +namespace oceanbase +{ +namespace unittest +{ + +using namespace oceanbase::common; +using namespace oceanbase::storage; +using namespace oceanbase::rootserver; + + +class ObDDLTaskTest : public ObSimpleClusterTestBase +{ +public: + ObDDLTaskTest() : ObSimpleClusterTestBase("test_ddl_task_") {} +}; + +TEST_F(ObDDLTaskTest, create_tenant) +{ + uint64_t tenant_id; + ASSERT_EQ(OB_SUCCESS, create_tenant()); + ASSERT_EQ(OB_SUCCESS, get_tenant_id(tenant_id)); + ASSERT_NE(0, tenant_id); + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); +} + +class MockDDLTask : public ObDDLTask +{ +public: + MockDDLTask() : ObDDLTask(DDL_INVALID) { } + virtual int process() { return OB_SUCCESS; } + virtual int cleanup_impl() { return OB_SUCCESS; } + virtual void flt_set_task_span_tag() const { } + virtual void flt_set_status_span_tag() const { } +}; + +TEST_F(ObDDLTaskTest, switch_status) +{ + common::ObMySQLProxy &tenant_sql_proxy = get_curr_simple_server().get_sql_proxy2(); + ObArenaAllocator arena; + uint64_t tenant_id = 0; + ASSERT_EQ(OB_SUCCESS, get_tenant_id(tenant_id)); + MockDDLTask task; + ObDDLTaskRecord task_record; + task.ddl_stmt_str_ = "create index xxx"; + task.gmt_create_ = ObTimeUtility::current_time(); + task.tenant_id_ = tenant_id; + task.object_id_ = 1; + task.target_object_id_ = 1; + task.schema_version_ = 1; + task.task_type_ = DDL_CREATE_INDEX; + task.task_status_ = ObDDLTaskStatus::PREPARE; + task.task_id_ = 1; + task.parent_task_id_ = 0; + task.task_version_ = 1; + task.execution_id_ = 1; + task.ret_code_ = OB_SUCCESS; + task.trace_id_.id_.seq_ = 1; + ASSERT_EQ(OB_SUCCESS, task.convert_to_record(task_record, arena)); + ASSERT_EQ(OB_SUCCESS, ObDDLTaskRecordOperator::insert_record(*GCTX.sql_proxy_, task_record)); + ASSERT_EQ(OB_SUCCESS, task.switch_status(ObDDLTaskStatus::DROP_SCHEMA, false/*enable_flt*/, OB_SUCCESS)); +} + +TEST_F(ObDDLTaskTest, end) +{ + // used for debug +// ::sleep(3600); +} + +} +} + +int main(int argc, char **argv) +{ + oceanbase::unittest::init_log_and_gtest(argc, argv); + OB_LOGGER.set_log_level("INFO"); + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/observer/CMakeLists.txt b/src/observer/CMakeLists.txt index fbd2bef59f..960bfb282d 100644 --- a/src/observer/CMakeLists.txt +++ b/src/observer/CMakeLists.txt @@ -301,6 +301,7 @@ ob_set_subtarget(ob_server virtual_table virtual_table/ob_all_virtual_table_mgr.cpp virtual_table/ob_all_virtual_unit.cpp virtual_table/ob_all_virtual_tablet_ddl_kv_info.cpp + virtual_table/ob_all_virtual_ddl_sim_point_stat.cpp virtual_table/ob_all_virtual_tablet_info.cpp virtual_table/ob_all_virtual_server.cpp virtual_table/ob_all_virtual_tablet_compaction_history.cpp diff --git a/src/observer/ob_inner_sql_connection.cpp b/src/observer/ob_inner_sql_connection.cpp index 7c3e74ed38..6d1f212c57 100644 --- a/src/observer/ob_inner_sql_connection.cpp +++ b/src/observer/ob_inner_sql_connection.cpp @@ -33,6 +33,7 @@ #include "observer/ob_server_struct.h" #include "observer/virtual_table/ob_virtual_table_iterator_factory.h" #include "observer/ob_req_time_service.h" +#include "observer/ob_server_event_history_table_operator.h" #include "ob_inner_sql_connection_pool.h" #include "ob_inner_sql_read_context.h" #include "ob_inner_sql_result.h" @@ -1458,6 +1459,16 @@ int ObInnerSQLConnection::execute_write_inner(const uint64_t tenant_id, const Ob } else if (OB_FAIL(res.close())) { LOG_WARN("close result set failed", K(ret), K(tenant_id), K(sql)); } + if (get_session().get_ddl_info().is_ddl()) { + SERVER_EVENT_ADD( + "ddl", "local execute ddl inner sql", + "tenant_id", tenant_id, + "trace_id", *ObCurTraceId::get_trace_id(), + "ret", ret, + "affected_rows", affected_rows, + "start_ts", res.execute_start_ts_, + "end_ts", res.execute_end_ts_); + } } else if (is_resource_conn()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("resource_conn of resource_svr still doesn't has the tenant resource", @@ -1534,6 +1545,16 @@ int ObInnerSQLConnection::execute_write_inner(const uint64_t tenant_id, const Ob || ObStmt::is_savepoint_stmt(handler->get_result()->get_stmt_type()); get_session().set_has_exec_inner_dml(dml_or_savepoint); } + if (get_session().get_ddl_info().is_ddl()) { + SERVER_EVENT_ADD( + "ddl", "send ddl inner sql", + "tenant_id", tenant_id, + "trace_id", *ObCurTraceId::get_trace_id(), + "ret", ret, + "affected_rows", affected_rows, + "start_ts", res.execute_start_ts_, + "end_ts", res.execute_end_ts_); + } if (OB_SUCC(ret)) { if (OB_FAIL(res.close())) { LOG_WARN("close result set failed", K(ret), K(tenant_id), K(sql)); diff --git a/src/observer/ob_server.cpp b/src/observer/ob_server.cpp index f755088caf..058bd156e3 100644 --- a/src/observer/ob_server.cpp +++ b/src/observer/ob_server.cpp @@ -102,6 +102,7 @@ #include "share/scheduler/ob_dag_warning_history_mgr.h" #include "share/longops_mgr/ob_longops_mgr.h" #include "logservice/palf/election/interface/election.h" +#include "share/ob_ddl_sim_point.h" #include "storage/ddl/ob_ddl_redo_log_writer.h" #include "observer/ob_server_utils.h" #include "observer/table_load/ob_table_load_partition_calc.h" @@ -482,6 +483,10 @@ int ObServer::init(const ObServerOptions &opts, const ObPLogWriterCfg &log_cfg) LOG_ERROR("init server blacklist failed", KR(ret)); } else if (OB_FAIL(ObLongopsMgr::get_instance().init())) { LOG_WARN("init longops mgr fail", KR(ret)); +#ifdef ERRSIM + } else if (OB_FAIL(ObDDLSimPointMgr::get_instance().init())) { + LOG_WARN("init ddl sim point mgr fail", KR(ret)); +#endif } else if (OB_FAIL(ObDDLRedoLogWriter::get_instance().init())) { LOG_WARN("init DDL redo log writer failed", KR(ret)); } diff --git a/src/observer/ob_server_reload_config.cpp b/src/observer/ob_server_reload_config.cpp index 7bfb1e56e6..b67c2b8e85 100644 --- a/src/observer/ob_server_reload_config.cpp +++ b/src/observer/ob_server_reload_config.cpp @@ -31,6 +31,7 @@ #include "storage/tx_storage/ob_tenant_freezer.h" #include "storage/compaction/ob_tenant_tablet_scheduler.h" #include "storage/slog/ob_storage_logger_manager.h" +#include "share/ob_ddl_sim_point.h" using namespace oceanbase::lib; using namespace oceanbase::common; diff --git a/src/observer/ob_service.cpp b/src/observer/ob_service.cpp index c74bb83b3b..c8c2836397 100644 --- a/src/observer/ob_service.cpp +++ b/src/observer/ob_service.cpp @@ -71,6 +71,7 @@ #include "storage/tablet/ob_tablet_create_delete_mds_user_data.h" #include "share/backup/ob_backup_path.h" #include "share/backup/ob_backup_connectivity.h" +#include "share/ob_ddl_sim_point.h" // for DDL_SIM #include "storage/backup/ob_backup_utils.h" #include "observer/report/ob_tenant_meta_checker.h"//ObTenantMetaChecker #include "rootserver/backup/ob_backup_task_scheduler.h" // ObBackupTaskScheduler @@ -598,7 +599,9 @@ int ObService::calc_column_checksum_request(const obrpc::ObCalcColumnChecksumReq ObUniqueCheckingDag *dag = NULL; int tmp_ret = OB_SUCCESS; saved_ret = OB_SUCCESS; - if (OB_TMP_FAIL(dag_scheduler->alloc_dag(dag))) { + if (OB_TMP_FAIL(DDL_SIM(tenant_id, arg.task_id_, CALC_COLUMN_CHECKSUM_RPC_SLOW))) { + LOG_WARN("ddl sim failure: calcualte column checksum rpc slow", K(tmp_ret), K(tenant_id), K(arg.task_id_)); + } else if (OB_TMP_FAIL(dag_scheduler->alloc_dag(dag))) { STORAGE_LOG(WARN, "fail to alloc dag", KR(tmp_ret)); } else if (OB_TMP_FAIL(dag->init(arg.tenant_id_, calc_item.ls_id_, @@ -1132,7 +1135,9 @@ int ObService::check_modify_time_elapsed( SCN snapshot_version; ObCheckTransElapsedResult single_result; int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(ls_service->get_ls(ls_id, ls_handle, ObLSGetMod::OBSERVER_MOD))) { + if (OB_TMP_FAIL(DDL_SIM(arg.tenant_id_, arg.ddl_task_id_, CHECK_MODIFY_TIME_ELAPSED_SLOW))) { + LOG_WARN("ddl sim failure: check modify time elapsed slow", K(tmp_ret), K(arg.tenant_id_), K(arg.ddl_task_id_)); + } else if (OB_TMP_FAIL(ls_service->get_ls(ls_id, ls_handle, ObLSGetMod::OBSERVER_MOD))) { LOG_WARN("get ls failed", K(tmp_ret), K(ls_id)); } else if (OB_TMP_FAIL(ls_handle.get_ls()->check_modify_time_elapsed(tablet_id, arg.sstable_exist_ts_, @@ -1185,7 +1190,9 @@ int ObService::check_schema_version_elapsed( const ObTabletID &tablet_id = arg.tablets_.at(i).tablet_id_; ObCheckTransElapsedResult single_result; int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(ls_service->get_ls(ls_id, ls_handle, ObLSGetMod::OBSERVER_MOD))) { + if (OB_TMP_FAIL(DDL_SIM(arg.tenant_id_, arg.ddl_task_id_, CHECK_SCHEMA_TRANS_END_SLOW))) { + LOG_WARN("ddl sim failure: check schema version elapsed slow", K(tmp_ret), K(arg)); + } else if (OB_TMP_FAIL(ls_service->get_ls(ls_id, ls_handle, ObLSGetMod::OBSERVER_MOD))) { LOG_WARN("get ls failed", K(tmp_ret), K(i), K(ls_id)); } else if (OB_TMP_FAIL(ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle))) { LOG_WARN("fail to get tablet", K(tmp_ret), K(i), K(ls_id), K(tablet_id)); diff --git a/src/observer/omt/ob_multi_tenant.cpp b/src/observer/omt/ob_multi_tenant.cpp index f52baeb559..d55b83bef7 100644 --- a/src/observer/omt/ob_multi_tenant.cpp +++ b/src/observer/omt/ob_multi_tenant.cpp @@ -92,6 +92,7 @@ #include "share/scheduler/ob_dag_warning_history_mgr.h" #include "storage/compaction/ob_compaction_diagnose.h" #include "share/io/ob_io_manager.h" +#include "share/ob_ddl_sim_point.h" #include "rootserver/freeze/ob_major_freeze_service.h" #include "observer/omt/ob_tenant_config_mgr.h" #include "observer/omt/ob_tenant_srs.h" @@ -1284,6 +1285,9 @@ int ObMultiTenant::update_tenant_config(uint64_t tenant_id) if (OB_TMP_FAIL(update_tenant_dag_scheduler_config())) { LOG_WARN("failed to update tenant dag scheduler config", K(tmp_ret), K(tenant_id)); } + if (OB_TMP_FAIL(update_tenant_ddl_config())) { + LOG_WARN("failed to update tenant ddl config", K(tmp_ret), K(tenant_id)); + } if (OB_TMP_FAIL(update_tenant_freezer_config_())) { LOG_WARN("failed to update tenant tenant freezer config", K(tmp_ret), K(tenant_id)); } @@ -1320,6 +1324,24 @@ int ObMultiTenant::update_tenant_dag_scheduler_config() return ret; } +int ObMultiTenant::update_tenant_ddl_config() +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = MTL_ID(); + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id)); +#ifdef ERRSIM + if (tenant_config.is_valid()) { + if (OB_FAIL(ObDDLSimPointMgr::get_instance().set_tenant_param(tenant_id, + tenant_config->errsim_ddl_sim_point_random_control, + tenant_config->errsim_ddl_sim_point_fixed_list))) { + LOG_WARN("set tenant param for ddl sim point failed", K(ret), + K(tenant_id), K(tenant_config->errsim_ddl_sim_point_random_control), K(tenant_config->errsim_ddl_sim_point_fixed_list)); + } + } +#endif + return ret; +} + int ObMultiTenant::update_tenant_freezer_config_() { int ret = OB_SUCCESS; diff --git a/src/observer/omt/ob_multi_tenant.h b/src/observer/omt/ob_multi_tenant.h index fb612f4f8a..4b7d9c01d8 100644 --- a/src/observer/omt/ob_multi_tenant.h +++ b/src/observer/omt/ob_multi_tenant.h @@ -117,6 +117,7 @@ public: int update_tenant_config(uint64_t tenant_id); int update_palf_config(); int update_tenant_dag_scheduler_config(); + int update_tenant_ddl_config(); int get_tenant(const uint64_t tenant_id, ObTenant *&tenant) const; int get_tenant_with_tenant_lock(const uint64_t tenant_id, common::ObLDHandle &handle, ObTenant *&tenant) const; int get_active_tenant_with_tenant_lock(const uint64_t tenant_id, common::ObLDHandle &handle, ObTenant *&tenant) const; diff --git a/src/observer/virtual_table/ob_all_virtual_ddl_sim_point_stat.cpp b/src/observer/virtual_table/ob_all_virtual_ddl_sim_point_stat.cpp new file mode 100644 index 0000000000..e11634bd7f --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_ddl_sim_point_stat.cpp @@ -0,0 +1,166 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + + +#define USING_LOG_PREFIX SERVER +#include "observer/virtual_table/ob_all_virtual_ddl_sim_point_stat.h" +#include "share/ob_ddl_sim_point.h" + +namespace oceanbase +{ +using namespace common; +using namespace share; +namespace observer +{ + + +int ObAllVirtualDDLSimPoint::inner_get_next_row(common::ObNewRow *&row) +{ +#ifdef ERRSIM + int ret = OB_SUCCESS; + ObObj *cells = cur_row_.cells_; + ObDDLSimPoint sim_point; + while (OB_SUCC(ret)) { + if (point_idx_ >= MAX_DDL_SIM_POINT_ID) { + ret = OB_ITER_END; + } else if (OB_FAIL(ObDDLSimPointMgr::get_instance().get_sim_point(point_idx_++, sim_point))) { + LOG_WARN("get ddl sim point failed", K(ret)); + } else if (sim_point.is_valid()) { + break; + } + } + if (OB_SUCC(ret) && sim_point.is_valid()) { + for (int64_t i = 0; OB_SUCC(ret) && i < output_column_ids_.count(); ++i) { + const uint64_t column_id = output_column_ids_.at(i); + switch (column_id) { + case SIM_POINT_ID: { + cells[i].set_int(sim_point.id_); + break; + } + case SIM_POINT_NAME: { + cells[i].set_varchar(sim_point.name_); + cells[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + case SIM_POINT_DESC: { + cells[i].set_varchar(sim_point.desc_); + cells[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + case SIM_POINT_ACTION: { + memset(action_str_, 0, sizeof(action_str_)); + sim_point.action_->to_string(action_str_, sizeof(action_str_)); + action_str_[sizeof(action_str_) - 1] = 0; + cells[i].set_varchar(action_str_); + cells[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + } + } + if (OB_SUCC(ret)) { + row = &cur_row_; + } + } + return ret; +#else + return OB_ITER_END; +#endif +} + +int ObAllVirtualDDLSimPointStat::init(const common::ObAddr &addr) +{ +#ifdef ERRSIM + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY((!addr.is_valid()))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(addr)); + } else { + addr_ = addr; + MEMSET(ip_buf_, 0, sizeof(ip_buf_)); + if (!addr_.ip_to_string(ip_buf_, sizeof(ip_buf_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ip to string failed", K(ret), K(addr_)); + } else if (OB_FAIL(ObDDLSimPointMgr::get_instance().get_sim_stat(task_sim_points_, sim_counts_))) { + LOG_WARN("get ddl sim stat failed", K(ret)); + } else if (OB_UNLIKELY(task_sim_points_.count() != sim_counts_.count())) { + ret = OB_ERR_SYS; + LOG_WARN("the stat count not match", K(ret), K(task_sim_points_.count()), K(sim_counts_.count())); + } else { + idx_ = 0; + is_inited_ = true; + } + } + return ret; +#else + return OB_SUCCESS; +#endif +} + +int ObAllVirtualDDLSimPointStat::inner_get_next_row(common::ObNewRow *&row) +{ +#ifdef ERRSIM + int ret = OB_SUCCESS; + ObObj *cells = cur_row_.cells_; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret)); + } else if (idx_ >= task_sim_points_.count()) { + ret = OB_ITER_END; + } else { + const ObDDLSimPointMgr::TaskSimPoint &task_sim_point = task_sim_points_.at(idx_); + const int64_t sim_count = sim_counts_.at(idx_); + for (int64_t i = 0; OB_SUCC(ret) && i < output_column_ids_.count(); ++i) { + const uint64_t column_id = output_column_ids_.at(i); + switch (column_id) { + case SVR_IP: { + cells[i].set_varchar(ip_buf_); + cells[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } + case SVR_PORT: { + cells[i].set_int(addr_.get_port()); + break; + } + case TENANT_ID: { + cells[i].set_int(task_sim_point.tenant_id_); + break; + } + case DDL_TASK_ID: { + cells[i].set_int(task_sim_point.task_id_); + break; + } + case SIM_POINT_ID: { + cells[i].set_int(task_sim_point.point_id_); + break; + } + case TRIGGER_COUNT: { + cells[i].set_int(sim_count); + break; + } + } + } + if (OB_SUCC(ret)) { + row = &cur_row_; + ++idx_; + } + } + return ret; +#else + return OB_ITER_END; +#endif +} + +} +} diff --git a/src/observer/virtual_table/ob_all_virtual_ddl_sim_point_stat.h b/src/observer/virtual_table/ob_all_virtual_ddl_sim_point_stat.h new file mode 100644 index 0000000000..cca0364f13 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_ddl_sim_point_stat.h @@ -0,0 +1,77 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_ALL_VIRTUAL_DDL_SIM_POINT_STAT_H_ +#define OB_ALL_VIRTUAL_DDL_SIM_POINT_STAT_H_ + +#include "share/ob_virtual_table_scanner_iterator.h" +#include "share/ob_ddl_sim_point.h" + +namespace oceanbase +{ +namespace observer +{ + +class ObAllVirtualDDLSimPoint : public common::ObVirtualTableScannerIterator +{ +public: + ObAllVirtualDDLSimPoint() : point_idx_(0) {} + virtual ~ObAllVirtualDDLSimPoint() {} + virtual int inner_get_next_row(common::ObNewRow *&row); +private: + enum DDLSimPointColumn + { + SIM_POINT_ID = common::OB_APP_MIN_COLUMN_ID, + SIM_POINT_NAME, + SIM_POINT_DESC, + SIM_POINT_ACTION, + }; + +private: + int64_t point_idx_; + char action_str_[1024]; + DISALLOW_COPY_AND_ASSIGN(ObAllVirtualDDLSimPoint); +}; + +class ObAllVirtualDDLSimPointStat : public common::ObVirtualTableScannerIterator +{ +public: + ObAllVirtualDDLSimPointStat() : is_inited_(false), idx_(0) {} + virtual ~ObAllVirtualDDLSimPointStat() {} + int init(const common::ObAddr &addr); + virtual int inner_get_next_row(common::ObNewRow *&row); +private: + enum DDLSimPointStatColumn + { + SVR_IP = common::OB_APP_MIN_COLUMN_ID, + SVR_PORT, + TENANT_ID, + DDL_TASK_ID, + SIM_POINT_ID, + TRIGGER_COUNT, + }; + +private: + DISALLOW_COPY_AND_ASSIGN(ObAllVirtualDDLSimPointStat); + bool is_inited_; + common::ObAddr addr_; + char ip_buf_[common::MAX_IP_ADDR_LENGTH]; + int64_t idx_; + ObArray task_sim_points_; + ObArray sim_counts_; +}; + +} // namespace observer +} // namespace oceanbase + + +#endif diff --git a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp index 554ca4ced8..3f96cf6d44 100644 --- a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp +++ b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp @@ -153,6 +153,7 @@ #include "observer/virtual_table/ob_all_virtual_tablet_compaction_history.h" #include "observer/virtual_table/ob_all_virtual_tablet_compaction_info.h" #include "observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.h" +#include "observer/virtual_table/ob_all_virtual_ddl_sim_point_stat.h" #include "observer/virtual_table/ob_all_virtual_tablet_pointer_status.h" #include "observer/virtual_table/ob_all_virtual_storage_meta_memory_status.h" #include "sql/session/ob_sql_session_info.h" @@ -2334,6 +2335,24 @@ int ObVTIterCreator::create_vt_iter(ObVTableScanParam ¶ms, } break; } + case OB_ALL_VIRTUAL_DDL_SIM_POINT_TID: { + ObAllVirtualDDLSimPoint *ddl_sim_point = nullptr; + if (OB_SUCC(NEW_VIRTUAL_TABLE(ObAllVirtualDDLSimPoint, ddl_sim_point))) { + vt_iter = static_cast(ddl_sim_point); + } + break; + } + case OB_ALL_VIRTUAL_DDL_SIM_POINT_STAT_TID: { + ObAllVirtualDDLSimPointStat *ddl_sim_point_stat = nullptr; + if (OB_SUCC(NEW_VIRTUAL_TABLE(ObAllVirtualDDLSimPointStat, ddl_sim_point_stat))) { + if (OB_FAIL(ddl_sim_point_stat->init(addr_))) { + SERVER_LOG(WARN, "fail to init ddl sim point stat iterator, ", K(ret)); + } else { + vt_iter = static_cast(ddl_sim_point_stat); + } + } + break; + } case OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TID: { ObAllVirtualTabletCompactionInfo *info_mgr = NULL; if (OB_SUCC(NEW_VIRTUAL_TABLE(ObAllVirtualTabletCompactionInfo, info_mgr))) { diff --git a/src/rootserver/ddl_task/ob_column_redefinition_task.cpp b/src/rootserver/ddl_task/ob_column_redefinition_task.cpp index fc19a54bd8..099a03b06f 100644 --- a/src/rootserver/ddl_task/ob_column_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_column_redefinition_task.cpp @@ -17,6 +17,7 @@ #include "share/ob_ddl_error_message_table_operator.h" #include "share/ob_autoincrement_service.h" #include "share/ob_ddl_checksum.h" +#include "share/ob_ddl_sim_point.h" #include "rootserver/ddl_task/ob_ddl_scheduler.h" #include "rootserver/ob_root_service.h" #include "rootserver/ddl_task/ob_ddl_redefinition_task.h" @@ -40,7 +41,7 @@ ObColumnRedefinitionTask::~ObColumnRedefinitionTask() int ObColumnRedefinitionTask::init(const uint64_t tenant_id, const int64_t task_id, const share::ObDDLType &ddl_type, const int64_t data_table_id, const int64_t dest_table_id, const int64_t schema_version, const int64_t parallelism, const int64_t consumer_group_id, - const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) { int ret = OB_SUCCESS; uint64_t tenant_data_format_version = 0; @@ -70,6 +71,7 @@ int ObColumnRedefinitionTask::init(const uint64_t tenant_id, const int64_t task_ task_version_ = OB_COLUMN_REDEFINITION_TASK_VERSION; task_id_ = task_id; parallelism_ = parallelism; + sub_task_trace_id_ = sub_task_trace_id; consumer_group_id_ = consumer_group_id; execution_id_ = 1L; start_time_ = ObTimeUtility::current_time(); @@ -159,6 +161,8 @@ int ObColumnRedefinitionTask::update_complete_sstable_job_status(const common::O if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObColumnRedefinitionTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UPDATE_COMPLETE_SSTABLE_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else if (ObDDLTaskStatus::REDEFINITION != task_status_) { // by pass, may be network delay } else if (snapshot_version != snapshot_version_) { @@ -187,6 +191,8 @@ int ObColumnRedefinitionTask::copy_table_indexes() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_COPY_INDEX_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else { const int64_t MAX_ACTIVE_TASK_CNT = 1; int64_t active_task_cnt = 0; @@ -246,6 +252,9 @@ int ObColumnRedefinitionTask::copy_table_indexes() ObDDLTaskRecord task_record; bool need_rebuild_index = true; SMART_VAR(obrpc::ObCreateIndexArg, create_index_arg) { + ObTraceIdGuard trace_id_guard(get_trace_id()); + ATOMIC_INC(&sub_task_trace_id_); + ObDDLEventInfo ddl_event_info(sub_task_trace_id_); // this create index arg is not valid, only has nls format create_index_arg.nls_date_format_ = alter_table_arg_.nls_formats_[0]; create_index_arg.nls_timestamp_format_ = alter_table_arg_.nls_formats_[1]; @@ -274,6 +283,7 @@ int ObColumnRedefinitionTask::copy_table_indexes() &allocator_, &create_index_arg, task_id_); + param.sub_task_trace_id_ = sub_task_trace_id_; if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { @@ -288,7 +298,10 @@ int ObColumnRedefinitionTask::copy_table_indexes() LOG_WARN("fail to schedule ddl task", K(ret), K(task_record)); } } - if (OB_SUCC(ret) && need_rebuild_index) { + if (OB_FAIL(ret)) { + add_event_info("create column redefinition index fail"); + LOG_WARN("add build index task failed", K(ret), K(ddl_event_info), K(task_record)); + } else if (need_rebuild_index) { TCWLockGuard guard(lock_); const uint64_t task_key = index_ids.at(i); DependTaskStatus status; @@ -300,7 +313,8 @@ int ObColumnRedefinitionTask::copy_table_indexes() LOG_WARN("set dependent task map failed", K(ret), K(task_key)); } } - LOG_INFO("add build index task", K(ret), K(task_key), K(status)); + add_event_info("create column redefinition index succ"); + LOG_INFO("add build index task", K(ret), K(task_key), K(status), K(ddl_event_info)); } } } @@ -327,6 +341,8 @@ int ObColumnRedefinitionTask::copy_table_constraints() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_COPY_CONSTRAINT_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else { if (has_rebuild_constraint_) { // do nothing @@ -379,6 +395,8 @@ int ObColumnRedefinitionTask::copy_table_foreign_keys() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_COPY_FOREIGN_KEY_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else { if (has_rebuild_foreign_key_) { // do nothing @@ -463,6 +481,8 @@ int ObColumnRedefinitionTask::copy_table_dependent_objects(const ObDDLTaskStatus } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_COPY_DEPENDENT_OBJECTS_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (!dependent_task_result_map_.created() && OB_FAIL(dependent_task_result_map_.create(MAX_DEPEND_OBJECT_COUNT, lib::ObLabel("DepTasMap")))) { LOG_WARN("create dependent task map failed", K(ret)); } else { @@ -547,6 +567,8 @@ int ObColumnRedefinitionTask::take_effect(const ObDDLTaskStatus next_task_status } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_TAKE_EFFECT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(tenant_id_, schema_guard))) { LOG_WARN("get tenant schema guard failed", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, target_object_id_, table_schema))) { @@ -766,6 +788,8 @@ int ObColumnRedefinitionTask::collect_longops_stat(ObLongopsValue &value) break; } if (OB_FAIL(ret)) { + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_COLLECT_LONGOPS_STAT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(copy_longops_stat(value))) { LOG_WARN("failed to collect common longops stat", K(ret)); } diff --git a/src/rootserver/ddl_task/ob_column_redefinition_task.h b/src/rootserver/ddl_task/ob_column_redefinition_task.h index 0740700e7b..6593fd8727 100644 --- a/src/rootserver/ddl_task/ob_column_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_column_redefinition_task.h @@ -37,6 +37,7 @@ public: const int64_t schema_version, const int64_t parallelism, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); diff --git a/src/rootserver/ddl_task/ob_constraint_task.cpp b/src/rootserver/ddl_task/ob_constraint_task.cpp index 4aa25c0102..8907ab5822 100755 --- a/src/rootserver/ddl_task/ob_constraint_task.cpp +++ b/src/rootserver/ddl_task/ob_constraint_task.cpp @@ -16,6 +16,7 @@ #include "share/schema/ob_schema_struct.h" #include "share/ob_ddl_error_message_table_operator.h" #include "share/ob_ddl_common.h" +#include "share/ob_ddl_sim_point.h" #include "storage/ddl/ob_ddl_lock.h" #include "rootserver/ob_root_service.h" #include "rootserver/ob_snapshot_info_manager.h" @@ -67,6 +68,8 @@ int ObCheckConstraintValidationTask::process() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, VALIDATE_CONSTRAINT_OR_FOREIGN_KEY_TASK_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (!check_table_empty_ && OB_ISNULL(constraint = table_schema->get_constraint(constraint_id_))) { ret = OB_ERR_CONTRAINT_NOT_FOUND; LOG_WARN("error unexpected, can not get constraint", K(ret)); @@ -153,6 +156,18 @@ int ObCheckConstraintValidationTask::process() if (OB_SUCCESS != (tmp_ret = root_service->get_ddl_scheduler().on_sstable_complement_job_reply(unused_tablet_id, task_key, 1L/*unused snapshot version*/, 1L/*unused execution id*/, ret, info))) { LOG_WARN("fail to finish check constraint task", K(ret), K(tmp_ret)); } + char table_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "data_table_id:%ld, target_object_id:%ld", + data_table_id_, target_object_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "check constraint validation task process finish", + K_(tenant_id), + "ret", ret, + K_(trace_id), + K_(task_id), + K_(constraint_id), + K_(schema_version), + table_id_buffer); + LOG_INFO("process check constraint validation task", "ddl_event_info", ObDDLEventInfo(), K(task_id_), K(constraint_id_)); return ret; } @@ -189,6 +204,8 @@ int ObForeignKeyConstraintValidationTask::process() if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, VALIDATE_CONSTRAINT_OR_FOREIGN_KEY_TASK_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { ObTabletID unused_tablet_id; ObDDLTaskKey task_key(tenant_id_, foregin_key_id_, schema_version_); @@ -200,8 +217,16 @@ int ObForeignKeyConstraintValidationTask::process() if (OB_SUCCESS != (tmp_ret = root_service->get_ddl_scheduler().on_sstable_complement_job_reply(unused_tablet_id, task_key, 1L/*unused snapshot version*/, 1L/*unused execution id*/, ret, info))) { LOG_WARN("fail to finish check constraint task", K(ret)); } - LOG_INFO("execute check foreign key task finish", K(ret), K(task_key), K(data_table_id_), K(foregin_key_id_)); + LOG_INFO("execute check foreign key task finish", K(ret), "ddl_event_info", ObDDLEventInfo(), K(task_key), K(data_table_id_), K(foregin_key_id_)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "foreign key constraint validation task process finish", + K_(tenant_id), + "ret", ret, + K_(trace_id), + K_(task_id), + K_(foregin_key_id), + K_(schema_version), + data_table_id_); return ret; } @@ -488,6 +513,7 @@ int ObConstraintTask::init( const int64_t schema_version, const ObAlterTableArg &alter_table_arg, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const int64_t parent_task_id, const int64_t status, const int64_t snapshot_version) @@ -522,6 +548,7 @@ int ObConstraintTask::init( task_id_ = task_id; parent_task_id_ = parent_task_id; consumer_group_id_ = consumer_group_id; + sub_task_trace_id_ = sub_task_trace_id; task_version_ = OB_CONSTRAINT_TASK_VERSION; is_table_hidden_ = table_schema->is_user_hidden_table(); dst_tenant_id_ = tenant_id_; @@ -598,6 +625,8 @@ int ObConstraintTask::hold_snapshot(const int64_t snapshot_version) } else if (OB_UNLIKELY(snapshot_version < 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(snapshot_version)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_HOLD_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(snapshot_scn.convert_for_tx(snapshot_version))) { LOG_WARN("failed to convert", K(snapshot_version), K(ret)); } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { @@ -635,6 +664,8 @@ int ObConstraintTask::release_snapshot(const int64_t snapshot_version) if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObConstraintTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_RELEASE_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { LOG_WARN("get tenant schema guard failed", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, object_id_, table_schema))) { @@ -673,7 +704,7 @@ int ObConstraintTask::wait_trans_end() } if (OB_SUCC(ret) && new_status != CHECK_CONSTRAINT_VALID && !wait_trans_ctx_.is_inited()) { - if (OB_FAIL(wait_trans_ctx_.init(tenant_id_, object_id_, ObDDLWaitTransEndCtx::WaitTransType::WAIT_SCHEMA_TRANS, schema_version_))) { + if (OB_FAIL(wait_trans_ctx_.init(tenant_id_, task_id_, object_id_, ObDDLWaitTransEndCtx::WaitTransType::WAIT_SCHEMA_TRANS, schema_version_))) { LOG_WARN("init wait trans ctx failed", K(ret)); } } @@ -1041,7 +1072,7 @@ int ObConstraintTask::report_check_constraint_error_code() } } if (OB_SUCC(ret)) { - if (OB_FAIL(ObDDLErrorMessageTableOperator::report_ddl_error_message(error_message, tenant_id_, task_id_, object_id_, schema_version_, -1/*object id*/, GCTX.self_addr(), root_service->get_sql_proxy()))) { + if (OB_FAIL(ObDDLErrorMessageTableOperator::report_ddl_error_message(error_message, tenant_id_, trace_id_, task_id_, parent_task_id_, object_id_, schema_version_, -1/*object id*/, GCTX.self_addr(), root_service->get_sql_proxy()))) { LOG_WARN("report constraint ddl error message failed", K(ret)); } } @@ -1098,7 +1129,7 @@ int ObConstraintTask::report_foreign_key_constraint_error_code() } } if (OB_SUCC(ret)) { - if (OB_FAIL(ObDDLErrorMessageTableOperator::report_ddl_error_message(error_message, tenant_id_, task_id_, object_id_, schema_version_, -1/*object id*/, GCTX.self_addr(), root_service->get_sql_proxy()))) { + if (OB_FAIL(ObDDLErrorMessageTableOperator::report_ddl_error_message(error_message, tenant_id_, trace_id_, task_id_, parent_task_id_, object_id_, schema_version_, -1/*object id*/, GCTX.self_addr(), root_service->get_sql_proxy()))) { LOG_WARN("report constraint ddl error message failed", K(ret)); } } @@ -1115,7 +1146,9 @@ int ObConstraintTask::set_foreign_key_constraint_validated() obrpc::ObAlterTableRes res; ObArenaAllocator allocator(lib::ObLabel("ConstraiTask")); SMART_VAR(ObAlterTableArg, alter_table_arg) { - if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, CONSTRAINT_TASK_SET_VALIDATED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { LOG_WARN("deep copy table arg failed", K(ret)); } else if (alter_table_arg.foreign_key_arg_list_.count() != 1) { ret = OB_ERR_UNEXPECTED; @@ -1192,6 +1225,8 @@ int ObConstraintTask::set_check_constraint_validated() if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root serivce must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, CONSTRAINT_TASK_SET_VALIDATED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { LOG_WARN("deep copy table arg failed", K(ret)); } else { @@ -1331,6 +1366,8 @@ int ObConstraintTask::set_new_not_null_column_validate() if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root serivce must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, CONSTRAINT_TASK_SET_VALIDATED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { LOG_WARN("deep copy table arg failed", K(ret)); } else { @@ -1416,7 +1453,9 @@ int ObConstraintTask::rollback_failed_check_constraint() obrpc::ObAlterTableRes tmp_res; ObArenaAllocator allocator(lib::ObLabel("ConstraiTask")); SMART_VAR(ObAlterTableArg, alter_table_arg) { - if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, CONSTRAINT_TASK_ROLL_BACK_SCHEMA))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { LOG_WARN("fail to deep copy table arg", K(ret)); } else { alter_table_arg.based_schema_object_infos_.reset(); @@ -1495,7 +1534,9 @@ int ObConstraintTask::rollback_failed_foregin_key() ObCreateForeignKeyArg &fk_arg = alter_table_arg_.foreign_key_arg_list_.at(0); SMART_VAR(ObAlterTableArg, alter_table_arg) { ObArenaAllocator allocator(lib::ObLabel("ConstraiTask")); - if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, CONSTRAINT_TASK_ROLL_BACK_SCHEMA))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { LOG_WARN("deep copy table arg failed", K(ret)); } else if (FALSE_IT(alter_table_arg.based_schema_object_infos_.reset())) { } else if (!is_table_hidden_ && OB_FAIL(ObDDLUtil::refresh_alter_table_arg(tenant_id_, object_id_, target_object_id_, alter_table_arg))) { @@ -1581,7 +1622,9 @@ int ObConstraintTask::rollback_failed_add_not_null_columns() bool first_alter_clause = true; ObArenaAllocator allocator(lib::ObLabel("ConstraiTask")); SMART_VAR(ObAlterTableArg, alter_table_arg) { - if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, CONSTRAINT_TASK_ROLL_BACK_SCHEMA))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(deep_copy_table_arg(allocator, alter_table_arg_, alter_table_arg))) { LOG_WARN("deep copy table arg failed", K(ret)); } else if (OB_ISNULL(buf = static_cast(allocator.alloc(buf_len)))) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -1825,6 +1868,8 @@ int ObConstraintTask::set_constraint_validated() if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObConstraintTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, CONSTRAINT_TASK_SET_VALIDATED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (task_type_ == ObDDLType::DDL_CHECK_CONSTRAINT) { if (OB_FAIL(set_check_constraint_validated())) { LOG_WARN("set check constraint validated failed", K(ret)); @@ -1887,6 +1932,10 @@ int ObConstraintTask::process() } ddl_tracing_.release_span_hierarchy(); } + if (OB_FAIL(ret)) { + add_event_info("constraint task process fail"); + LOG_INFO("process constraint task fail", "ddl_event_info", ObDDLEventInfo()); + } return ret; } diff --git a/src/rootserver/ddl_task/ob_constraint_task.h b/src/rootserver/ddl_task/ob_constraint_task.h index 3cd3a8164c..f06c5efa03 100644 --- a/src/rootserver/ddl_task/ob_constraint_task.h +++ b/src/rootserver/ddl_task/ob_constraint_task.h @@ -98,6 +98,7 @@ public: const int64_t schema_version, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const int64_t parent_task_id = 0, const int64_t status = share::ObDDLTaskStatus::WAIT_TRANS_END, const int64_t snapshot_version = 0); diff --git a/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp b/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp index dad0708001..b3c116505d 100644 --- a/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_redefinition_task.cpp @@ -21,6 +21,7 @@ #include "share/ob_ddl_error_message_table_operator.h" #include "share/ob_autoincrement_service.h" #include "share/ob_ddl_checksum.h" +#include "share/ob_ddl_sim_point.h" #include "storage/tablelock/ob_table_lock_service.h" #include "storage/tablelock/ob_table_lock_rpc_client.h" #include "share/scn.h" @@ -67,6 +68,8 @@ int ObDDLRedefinitionSSTableBuildTask::init( if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_SSTABLE_BULD_TASK_INIT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(tz_info_wrap_.deep_copy(tz_info_wrap))) { LOG_WARN("fail to copy time zone info wrap", K(ret), K(tz_info_wrap)); } else if (OB_FAIL(col_name_map_.init(orig_table_schema, alter_table_schema))) { @@ -95,6 +98,8 @@ int ObDDLRedefinitionSSTableBuildTask::process() if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ddl redefinition sstable build task not inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, BUILD_REPLICA_ASYNC_TASK_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( tenant_id_, schema_guard))) { LOG_WARN("fail to get tenant schema guard", K(ret), K(data_table_id_)); @@ -148,21 +153,24 @@ int ObDDLRedefinitionSSTableBuildTask::process() const int64_t DDL_INNER_SQL_EXECUTE_TIMEOUT = ObDDLUtil::calc_inner_sql_execute_timeout(); if (inner_sql_exec_addr_.is_valid()) { sql_exec_addr = &inner_sql_exec_addr_; - LOG_INFO("inner sql execute addr" , K(*sql_exec_addr)); + LOG_INFO("inner sql execute addr" , K(*sql_exec_addr), "ddl_event_info", ObDDLEventInfo()); } if (oracle_mode) { user_sql_proxy = GCTX.ddl_oracle_sql_proxy_; } else { user_sql_proxy = GCTX.ddl_sql_proxy_; } + add_event_info(ret, "ddl redefinition sstable build task generate innersql"); LOG_INFO("execute sql" , K(sql_string), K(data_table_id_), K(tenant_id_), - "is_strict_mode", is_strict_mode(sql_mode_), K(sql_mode_), K(parallelism_), K(DDL_INNER_SQL_EXECUTE_TIMEOUT)); + "is_strict_mode", is_strict_mode(sql_mode_), K(sql_mode_), K(parallelism_), K(DDL_INNER_SQL_EXECUTE_TIMEOUT), "ddl_event_info", ObDDLEventInfo()); if (OB_FAIL(timeout_ctx.set_trx_timeout_us(DDL_INNER_SQL_EXECUTE_TIMEOUT))) { LOG_WARN("set trx timeout failed", K(ret)); } else if (OB_FAIL(timeout_ctx.set_timeout(DDL_INNER_SQL_EXECUTE_TIMEOUT))) { LOG_WARN("set timeout failed", K(ret)); } else { - if (OB_FAIL(user_sql_proxy->write(tenant_id_, sql_string.ptr(), affected_rows, + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_SSTABLE_BULD_TASK_PROCESS_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(user_sql_proxy->write(tenant_id_, sql_string.ptr(), affected_rows, oracle_mode ? ObCompatibilityMode::ORACLE_MODE : ObCompatibilityMode::MYSQL_MODE, &session_param, sql_exec_addr))) { LOG_WARN("fail to execute build replica sql", K(ret), K(tenant_id_)); } else if (OB_FAIL(ObCheckTabletDataComplementOp::check_finish_report_checksum(tenant_id_, dest_table_id_, execution_id_, task_id_))) { @@ -173,11 +181,25 @@ int ObDDLRedefinitionSSTableBuildTask::process() } } if (OB_SUCCESS != (tmp_ret = root_service_->get_ddl_scheduler().on_sstable_complement_job_reply(unused_tablet_id, task_key, snapshot_version_, execution_id_, ret, info))) { - LOG_WARN("fail to finish sstable complement", K(ret)); + LOG_WARN("fail to finish sstable complement", K(ret), "ddl_event_info", ObDDLEventInfo()); } + add_event_info(ret, "ddl redefinition sstable build task finish"); return ret; } +void ObDDLRedefinitionSSTableBuildTask::add_event_info(const int ret, const ObString &ddl_event_stmt) +{ + char table_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "object_id:%ld, target_object_id:%ld", data_table_id_, dest_table_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", ddl_event_stmt.ptr(), + K_(tenant_id), + "ret", ret, + K_(trace_id), + K_(task_id), + "table_id", table_id_buffer, + "sql_exec_addr", inner_sql_exec_addr_); +} + ObAsyncTask *ObDDLRedefinitionSSTableBuildTask::deep_copy(char *buf, const int64_t buf_size) const { int ret = OB_SUCCESS; @@ -249,7 +271,9 @@ int ObDDLRedefinitionTask::check_table_empty(const ObDDLTaskStatus next_task_sta } else if (OB_FAIL(check_need_check_table_empty(need_check_table_empty))) { LOG_WARN("failed to check need check table empty", K(ret)); } else if (need_check_table_empty) { - if (!is_check_replica_end && 0 == check_table_empty_job_time_) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_CHECK_TABLE_EMPTY_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (!is_check_replica_end && 0 == check_table_empty_job_time_) { ObCheckConstraintValidationTask task(dst_tenant_id_, object_id_, -1/*constraint id*/, target_object_id_, schema_version_, trace_id_, task_id_, true/*check_table_empty*/, obrpc::ObAlterTableArg::AlterConstraintType::ADD_CONSTRAINT); @@ -296,6 +320,8 @@ int ObDDLRedefinitionTask::hold_snapshot(const int64_t snapshot_version) } else if (OB_UNLIKELY(snapshot_version < 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(snapshot_version)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_HOLD_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(snapshot_scn.convert_for_tx(snapshot_version))) { LOG_WARN("failed to convert", K(snapshot_version), K(ret)); } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { @@ -330,7 +356,8 @@ int ObDDLRedefinitionTask::hold_snapshot(const int64_t snapshot_version) LOG_WARN("batch acquire snapshot failed", K(ret), K(tablet_ids)); } } - LOG_INFO("hold snapshot finished", K(ret), K(snapshot_version), K(object_id_), K(target_object_id_), K(schema_version_)); + add_event_info("hold snapshot finish"); + LOG_INFO("hold snapshot finished", K(ret), K(snapshot_version), K(object_id_), K(target_object_id_), K(schema_version_), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -349,6 +376,8 @@ int ObDDLRedefinitionTask::release_snapshot(const int64_t snapshot_version) } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_RELEASE_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(schema_service.get_tenant_schema_guard(tenant_id_, schema_guard))) { LOG_WARN("get tenant schema guard failed", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, object_id_, data_table_schema))) { @@ -377,7 +406,8 @@ int ObDDLRedefinitionTask::release_snapshot(const int64_t snapshot_version) } else if (OB_FAIL(batch_release_snapshot(snapshot_version, tablet_ids))) { LOG_WARN("failed to release snapshot", K(ret)); } - LOG_INFO("release snapshot finished", K(ret), K(snapshot_version), K(object_id_), K(target_object_id_), K(schema_version_)); + add_event_info("release snapshot finish"); + LOG_INFO("release snapshot finished", K(ret), K(snapshot_version), K(object_id_), K(target_object_id_), K(schema_version_), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -397,7 +427,7 @@ int ObDDLRedefinitionTask::obtain_snapshot(const ObDDLTaskStatus next_task_statu } else if (snapshot_version_ > 0 && snapshot_held_) { // do nothing, already hold snapshot. } else if (!wait_trans_ctx_.is_inited()) { - if (OB_FAIL(wait_trans_ctx_.init(tenant_id_, object_id_, ObDDLWaitTransEndCtx::WAIT_SCHEMA_TRANS, schema_version_))) { + if (OB_FAIL(wait_trans_ctx_.init(tenant_id_, task_id_, object_id_, ObDDLWaitTransEndCtx::WAIT_SCHEMA_TRANS, schema_version_))) { LOG_WARN("fail to init wait trans ctx", K(ret)); } } @@ -449,6 +479,8 @@ int ObDDLRedefinitionTask::obtain_snapshot(const ObDDLTaskStatus next_task_statu LOG_WARN("fail to switch task status", K(ret)); } } + add_event_info("obtain snapshot finish"); + LOG_INFO("obtain snapshot", K(ret), K(snapshot_version_), K(object_id_), K(target_object_id_), K(schema_version_), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -505,7 +537,9 @@ int ObDDLRedefinitionTask::get_validate_checksum_columns_id(const ObTableSchema { int ret = OB_SUCCESS; bool is_oracle_mode = false; - if (OB_FAIL(alter_table_arg_.alter_table_schema_.check_if_oracle_compat_mode(is_oracle_mode))) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_GET_CHECKSUM_COLUMNS_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(alter_table_arg_.alter_table_schema_.check_if_oracle_compat_mode(is_oracle_mode))) { LOG_WARN("check if oracle mode failed", K(ret), K(object_id_), "dest_table_id", target_object_id_); } else { ObSQLMode sql_mode = alter_table_arg_.sql_mode_; @@ -612,7 +646,12 @@ int ObDDLRedefinitionTask::wait_data_complement(const ObDDLTaskStatus next_task_ DEBUG_SYNC(COLUMN_REDEFINITION_REPLICA_BUILD); if (is_build_replica_end) { ret = OB_SUCC(ret) ? complete_sstable_job_ret_code_ : ret; - if (OB_SUCC(ret) && OB_FAIL(check_data_dest_tables_columns_checksum(get_execution_id()))) { + bool need_verify_checksum = true; +#ifdef ERRSIM + // when the major compaction is delayed, skip verify column checksum + need_verify_checksum = 0 == GCONF.errsim_ddl_major_delay_time; +#endif + if (OB_SUCC(ret) && need_verify_checksum && OB_FAIL(check_data_dest_tables_columns_checksum(get_execution_id()))) { LOG_WARN("fail to check the columns checkum between data table and hidden one", K(ret)); } if (OB_FAIL(switch_status(next_task_status, true, ret))) { @@ -691,6 +730,8 @@ int ObDDLRedefinitionTask::check_data_dest_tables_columns_checksum(const int64_t if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_REDEF_TASK_CHECK_COLUMN_CHECKSUM_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else if (OB_FAIL(ObDDLUtil::get_tenant_schema_guard(tenant_id_, dst_tenant_id_, hold_buf_src_tenant_schema_guard, hold_buf_dst_tenant_schema_guard, src_tenant_schema_guard, dst_tenant_schema_guard))) { @@ -773,6 +814,9 @@ int ObDDLRedefinitionTask::add_constraint_ddl_task(const int64_t constraint_id) { int ret = OB_SUCCESS; SMART_VAR(obrpc::ObAlterTableArg, alter_table_arg) { + ObTraceIdGuard trace_id_guard(get_trace_id()); + ATOMIC_INC(&sub_task_trace_id_); + ObDDLEventInfo ddl_event_info(sub_task_trace_id_); ObSchemaGetterGuard schema_guard; const ObTableSchema *table_schema = nullptr; AlterTableSchema &alter_table_schema = alter_table_arg.alter_table_schema_; @@ -788,6 +832,8 @@ int ObDDLRedefinitionTask::add_constraint_ddl_task(const int64_t constraint_id) } else if (OB_UNLIKELY(OB_INVALID_ID == constraint_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(constraint_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, ADD_CONSTRAINT_DDL_TASK_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(root_service->get_ddl_service().get_tenant_schema_guard_with_version_in_inner_table(dst_tenant_id_, schema_guard))) { LOG_WARN("get schema guard failed", K(ret), K(dst_tenant_id_)); } else if (OB_FAIL(schema_guard.get_table_schema(dst_tenant_id_, target_object_id_, table_schema))) { @@ -830,6 +876,7 @@ int ObDDLRedefinitionTask::add_constraint_ddl_task(const int64_t constraint_id) &allocator_, &alter_table_arg, task_id_); + param.sub_task_trace_id_ = sub_task_trace_id_; if (OB_FAIL(root_service->get_ddl_task_scheduler().create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { @@ -852,7 +899,8 @@ int ObDDLRedefinitionTask::add_constraint_ddl_task(const int64_t constraint_id) LOG_WARN("set dependent task map failed", K(ret), K(constraint_id)); } } - LOG_INFO("add constraint task", K(ret), K(constraint_id), K(status)); + LOG_INFO("add constraint task finish", K(ret), K(constraint_id), K(status), K(ddl_event_info)); + add_event_info("ddl redefinition task add constraint finish"); } } } @@ -871,6 +919,9 @@ int ObDDLRedefinitionTask::add_fk_ddl_task(const int64_t fk_id) const ObTableSchema *orig_table_schema = nullptr; const ObTableSchema *hidden_table_schema = nullptr; SMART_VAR(obrpc::ObAlterTableArg, alter_table_arg) { + ObTraceIdGuard trace_id_guard(get_trace_id()); + ATOMIC_INC(&sub_task_trace_id_); + ObDDLEventInfo ddl_event_info(sub_task_trace_id_); AlterTableSchema &alter_table_schema = alter_table_arg.alter_table_schema_; ObConstraint *constraint = nullptr; ObRootService *root_service = GCTX.root_service_; @@ -884,6 +935,8 @@ int ObDDLRedefinitionTask::add_fk_ddl_task(const int64_t fk_id) } else if (OB_UNLIKELY(OB_INVALID_ID == fk_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(fk_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, ADD_FOREIGN_KEY_DDL_TASK_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(root_service->get_ddl_service().get_tenant_schema_guard_with_version_in_inner_table(tenant_id_, dst_tenant_id_, hold_buf_src_tenant_schema_guard, hold_buf_dst_tenant_schema_guard, src_tenant_schema_guard, dst_tenant_schema_guard))) { @@ -953,6 +1006,7 @@ int ObDDLRedefinitionTask::add_fk_ddl_task(const int64_t fk_id) &allocator_, &alter_table_arg, task_id_); + param.sub_task_trace_id_ = sub_task_trace_id_; if (OB_FAIL(alter_table_arg.foreign_key_arg_list_.push_back(fk_arg))) { LOG_WARN("push back foreign key arg failed", K(ret)); } else if (OB_FAIL(root_service->get_ddl_task_scheduler().create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { @@ -975,8 +1029,8 @@ int ObDDLRedefinitionTask::add_fk_ddl_task(const int64_t fk_id) LOG_WARN("set dependent task map failed", K(ret), K(fk_id)); } } - LOG_INFO("add fk task", K(ret), K(fk_id), K(status)); - + LOG_INFO("add fk task finish", K(ret), K(fk_arg), K(fk_id), K(status), K(ddl_event_info)); + add_event_info("ddl redefinition task add fk finish"); } } } @@ -996,6 +1050,8 @@ int ObDDLRedefinitionTask::on_child_task_finish( } else if (OB_UNLIKELY(common::OB_INVALID_ID == child_task_key)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(child_task_key)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, PROCESS_CHILD_TASK_FINISH_FAILED))) { + LOG_WARN("ddl sim failure: process child task finish failed", K(ret), K(tenant_id_), K(task_id_)); } else { TCWLockGuard guard(lock_); int64_t org_ret = INT64_MAX; @@ -1012,7 +1068,8 @@ int ObDDLRedefinitionTask::on_child_task_finish( } else if (OB_FAIL(dependent_task_result_map_.set_refactored(child_task_key, status, true/*overwrite*/))) { LOG_WARN("set dependent_task_result_map failed", K(ret), K(child_task_key)); } else { - LOG_INFO("child task finish successfully", K(child_task_key)); + add_event_info("ddl redefinition task finish child task finish"); + LOG_INFO("child task finish", K(child_task_key), "ddl_event_info", ObDDLEventInfo()); } } return ret; @@ -1030,6 +1087,8 @@ int ObDDLRedefinitionTask::sync_auto_increment_position() if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, SYNC_AUTOINC_POSITION_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (has_synced_autoincrement_) { // do nothing } else if (OB_FAIL(ObDDLUtil::get_tenant_schema_guard(tenant_id_, dst_tenant_id_, @@ -1127,6 +1186,8 @@ int ObDDLRedefinitionTask::modify_autoinc(const ObDDLTaskStatus next_task_status } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, MODIFY_AUTOINC_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(check_update_autoinc_end(is_update_autoinc_end))) { LOG_WARN("update autoinc failed", K(ret)); } else { @@ -1277,6 +1338,8 @@ int ObDDLRedefinitionTask::finish() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_FINISH_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (snapshot_version_ > 0 && OB_FAIL(release_snapshot(snapshot_version_))) { LOG_WARN("release snapshot failed", K(ret)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(tenant_id_, schema_guard))) { @@ -1384,6 +1447,8 @@ int ObDDLRedefinitionTask::check_health() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_CHECK_HEALTH_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (!root_service->in_service()) { ret = OB_STATE_NOT_MATCH; LOG_WARN("root service not in service, do not need retry", K(ret), K(object_id_), K(target_object_id_)); @@ -1451,7 +1516,9 @@ int ObDDLRedefinitionTask::get_orig_all_index_tablet_count(ObSchemaGetterGuard & int ret = OB_SUCCESS; const ObTableSchema *orig_table_schema = nullptr; all_tablet_count = 0; - if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, object_id_, orig_table_schema))) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_GET_ALL_TABLET_COUNT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, object_id_, orig_table_schema))) { LOG_WARN("get table schema failed", K(ret), K(tenant_id_), K(object_id_)); } else if (OB_ISNULL(orig_table_schema)) { ret = OB_ERR_UNEXPECTED; @@ -1495,7 +1562,9 @@ int ObDDLRedefinitionTask::sync_stats_info() ObTimeoutCtx timeout_ctx; int64_t timeout = 0; const int64_t start_time = ObTimeUtility::current_time(); - if (OB_FAIL(ObDDLUtil::get_tenant_schema_guard(tenant_id_, dst_tenant_id_, + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_SYNC_STATS_INFO_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(ObDDLUtil::get_tenant_schema_guard(tenant_id_, dst_tenant_id_, hold_buf_src_tenant_schema_guard, hold_buf_dst_tenant_schema_guard, src_tenant_schema_guard, dst_tenant_schema_guard))) { LOG_WARN("get tenant schema guard failed", K(ret), K(tenant_id_), K(dst_tenant_id_)); @@ -2260,7 +2329,9 @@ int ObDDLRedefinitionTask::generate_sync_column_partition_level_stats_sql(const int ObDDLRedefinitionTask::sync_tablet_autoinc_seq() { int ret = OB_SUCCESS; - if (!sync_tablet_autoinc_seq_ctx_.is_inited() + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_SYNC_TABLET_AUTOINC_SEQ_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (!sync_tablet_autoinc_seq_ctx_.is_inited() && OB_FAIL(sync_tablet_autoinc_seq_ctx_.init(tenant_id_/*src_tenant_id*/, dst_tenant_id_, object_id_, target_object_id_))) { LOG_WARN("failed to init sync tablet autoinc seq ctx", K(ret)); } else if (OB_FAIL(sync_tablet_autoinc_seq_ctx_.sync())) { @@ -2278,7 +2349,9 @@ int ObDDLRedefinitionTask::check_need_rebuild_constraint(const ObTableSchema &ta const int64_t CONSTRAINT_ID_BUCKET_NUM = 7; ObHashSet new_constraints_id_set; // newly added csts has already added into dest table schema at do_offline_ddl_in_trans stage. const AlterTableSchema &alter_table_schema = alter_table_arg_.alter_table_schema_; - if (OB_FAIL(new_constraints_id_set.create(CONSTRAINT_ID_BUCKET_NUM))) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_CHECK_REBUILD_CONSTRAINT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(new_constraints_id_set.create(CONSTRAINT_ID_BUCKET_NUM))) { LOG_WARN("create alter constraint id set failed", K(ret)); } else if (obrpc::ObAlterTableArg::ADD_CONSTRAINT == alter_table_arg_.alter_constraint_type_ || obrpc::ObAlterTableArg::ALTER_CONSTRAINT_STATE == alter_table_arg_.alter_constraint_type_) { @@ -2689,6 +2762,8 @@ int ObDDLRedefinitionTask::reap_old_replica_build_task(bool &need_exec_new_inner if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObIndexBuildTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REAP_OLD_REPLICA_BUILD_TASK_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( tenant_id_, schema_guard))) { LOG_WARN("fail to get tenant schema guard", K(ret), K(data_table_id)); diff --git a/src/rootserver/ddl_task/ob_ddl_redefinition_task.h b/src/rootserver/ddl_task/ob_ddl_redefinition_task.h index 61d57d7d6e..64c9309035 100644 --- a/src/rootserver/ddl_task/ob_ddl_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_ddl_redefinition_task.h @@ -49,6 +49,7 @@ public: virtual int process() override; virtual int64_t get_deep_copy_size() const override { return sizeof(*this); } virtual ObAsyncTask *deep_copy(char *buf, const int64_t buf_size) const override; + void add_event_info(const int ret, const ObString &ddl_event_stmt); private: bool is_inited_; uint64_t tenant_id_; diff --git a/src/rootserver/ddl_task/ob_ddl_retry_task.cpp b/src/rootserver/ddl_task/ob_ddl_retry_task.cpp index 1421288c7d..8900aa0fbe 100644 --- a/src/rootserver/ddl_task/ob_ddl_retry_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_retry_task.cpp @@ -16,6 +16,7 @@ #include "lib/rc/context.h" #include "share/schema/ob_multi_version_schema_service.h" #include "share/ob_ddl_error_message_table_operator.h" +#include "share/ob_ddl_sim_point.h" #include "rootserver/ddl_task/ob_ddl_scheduler.h" #include "rootserver/ob_root_service.h" #include "rootserver/ddl_task/ob_ddl_task.h" @@ -160,6 +161,7 @@ int ObDDLRetryTask::init(const uint64_t tenant_id, const uint64_t object_id, const int64_t schema_version, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const share::ObDDLType &ddl_type, const obrpc::ObDDLArg *ddl_arg, const int64_t task_status) @@ -188,6 +190,7 @@ int ObDDLRetryTask::init(const uint64_t tenant_id, target_object_id_ = object_id; schema_version_ = schema_version; consumer_group_id_ = consumer_group_id; + sub_task_trace_id_ = sub_task_trace_id; tenant_id_ = tenant_id; task_id_ = task_id; task_type_ = ddl_type; @@ -208,6 +211,8 @@ int ObDDLRetryTask::init(const ObDDLTaskRecord &task_record) if (OB_UNLIKELY(!task_record.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(task_record)); + } else if (OB_FAIL(DDL_SIM(task_record.tenant_id_, task_record.task_id_, DDL_TASK_INIT_BY_RECORD_FAILED))) { + LOG_WARN("ddl sim failure", K(task_record.tenant_id_), K(task_record.task_id_)); } else if (OB_ISNULL(root_service_ = GCTX.root_service_)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service is null", K(ret)); @@ -310,6 +315,8 @@ int ObDDLRetryTask::check_schema_change_done() } else if (OB_ISNULL(root_service_)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, RETRY_TASK_CHECK_SCHEMA_CHANGED_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { common::ObMySQLProxy &proxy = root_service_->get_sql_proxy(); SMART_VAR(ObMySQLProxy::MySQLResult, res) { @@ -322,6 +329,8 @@ int ObDDLRetryTask::check_schema_change_done() " SELECT status FROM %s WHERE task_id = %lu", OB_ALL_DDL_TASK_STATUS_TNAME, task_id_))) { LOG_WARN("assign query string failed", K(ret), KPC(this)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, RETRY_TASK_CHECK_SCHEMA_CHANGED_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(proxy.read(res, tenant_id_, query_string.ptr()))) { LOG_WARN("read record failed", K(ret), K(query_string)); } else if (OB_UNLIKELY(nullptr == (result = res.get_result()))) { @@ -352,6 +361,8 @@ int ObDDLRetryTask::drop_schema(const ObDDLTaskStatus next_task_status) } else if (OB_ISNULL(ddl_arg_) || lib::Worker::CompatMode::INVALID == compat_mode_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error", K(ret), KP(ddl_arg_), K(compat_mode_)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, RETRY_TASK_DROP_SCHEMA_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(check_schema_change_done())) { LOG_WARN("check task finished failed", K(ret)); } else if (is_schema_change_done_) { @@ -450,6 +461,8 @@ int ObDDLRetryTask::wait_alter_table(const ObDDLTaskStatus new_status) } else if (OB_ISNULL(ddl_arg_) || lib::Worker::CompatMode::INVALID == compat_mode_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error", K(ret), KP(ddl_arg_), K(compat_mode_)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, RETRY_TASK_WAIT_ALTER_TABLE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { switch (task_type_) { case ObDDLType::DDL_DROP_DATABASE: @@ -603,6 +616,10 @@ int ObDDLRetryTask::process() } } ddl_tracing_.release_span_hierarchy(); + if (OB_FAIL(ret)) { + add_event_info("ddl retry task process fail"); + LOG_INFO("ddl retry task process fail", K(ret), K(snapshot_version_), K(object_id_), K(target_object_id_), K(schema_version_), "ddl_event_info", ObDDLEventInfo()); + } } return ret; } @@ -702,6 +719,8 @@ int ObDDLRetryTask::update_task_status_wait_child_task_finish( if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, RETRY_TASK_UPDATE_BY_CHILD_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(ObDDLTaskRecordOperator::select_for_update(trans, tenant_id, task_id, curr_task_status, execution_id))) { LOG_WARN("select for update failed", K(ret), K(tenant_id), K(task_id)); } else if (OB_UNLIKELY(ObDDLTaskStatus::DROP_SCHEMA != curr_task_status)) { diff --git a/src/rootserver/ddl_task/ob_ddl_retry_task.h b/src/rootserver/ddl_task/ob_ddl_retry_task.h index b24ab0fe64..31dbc68746 100644 --- a/src/rootserver/ddl_task/ob_ddl_retry_task.h +++ b/src/rootserver/ddl_task/ob_ddl_retry_task.h @@ -33,6 +33,7 @@ public: const uint64_t object_id, const int64_t schema_version, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const share::ObDDLType &type, const obrpc::ObDDLArg *ddl_arg, const int64_t task_status = share::ObDDLTaskStatus::PREPARE); diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp index 5ad2373ea0..d5568a2458 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp @@ -31,6 +31,7 @@ #include "share/ob_rpc_struct.h" #include "share/longops_mgr/ob_longops_mgr.h" #include "share/scheduler/ob_sys_task_stat.h" +#include "share/ob_ddl_sim_point.h" #include "share/restore/ob_import_util.h" namespace oceanbase @@ -94,6 +95,8 @@ int ObDDLTaskQueue::push_task(ObDDLTask *task) } else if (OB_ISNULL(task)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), KP(task)); + } else if (OB_FAIL(DDL_SIM(task->get_tenant_id(), task->get_task_id(), PUSH_TASK_INTO_QUEUE_FAILED))) { + LOG_WARN("ddl sim failure when push_task", K(ret), KPC(task)); } else if (!task_list_.add_last(task)) { ret = common::OB_ERR_UNEXPECTED; STORAGE_LOG(ERROR, "unexpected error, add build index task failed", K(ret)); @@ -160,6 +163,8 @@ int ObDDLTaskQueue::remove_task(ObDDLTask *task) } else if (OB_ISNULL(task)) { ret = common::OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(task)); + } else if (OB_FAIL(DDL_SIM(task->get_tenant_id(), task->get_task_id(), REMOVE_TASK_FROM_QUEUE_FAILED))) { + LOG_WARN("ddl sim failure: remove_ddl_task failed", K(ret), K(task->get_ddl_task_id())); } else if (OB_FAIL(task_map_.erase_refactored(task->get_task_key()))) { LOG_WARN("fail to erase from task set", K(ret)); } else { @@ -225,6 +230,8 @@ int ObDDLTaskQueue::modify_task(const ObDDLTaskID &task_id, F &&op) } else if (OB_UNLIKELY(!task_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(task_id)); + } else if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, GET_TASK_FROM_QUEUE_FAILED))) { + LOG_WARN("ddl sim failure: get task from queue failed", K(ret), K(task_id)); } else if (OB_FAIL(task_id_map_.get_refactored(task_id, task))) { ret = OB_HASH_NOT_EXIST == ret ? OB_ENTRY_NOT_EXIST : ret; LOG_WARN("get from task map failed", K(ret), K(task_id)); @@ -278,6 +285,8 @@ int ObDDLTaskQueue::update_task_copy_deps_setting(const ObDDLTaskID &task_id, } else if (OB_UNLIKELY(!task_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(task_id)); + } else if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, GET_TASK_FROM_QUEUE_FAILED))) { + LOG_WARN("ddl sim failure: get task from queue failed", K(ret), K(task_id)); } else if (OB_FAIL(task_id_map_.get_refactored(task_id, task))) { ret = OB_HASH_NOT_EXIST == ret ? OB_ENTRY_NOT_EXIST : ret; LOG_WARN("get from task map failed", K(ret), K(task_id)); @@ -306,6 +315,8 @@ int ObDDLTaskQueue::update_task_process_schedulable(const ObDDLTaskID &task_id) } else if (OB_UNLIKELY(!task_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(task_id)); + } else if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, GET_TASK_FROM_QUEUE_FAILED))) { + LOG_WARN("ddl sim failure: get task from queue failed", K(ret), K(task_id)); } else if (OB_FAIL(task_id_map_.get_refactored(task_id, ddl_task))) { ret = OB_HASH_NOT_EXIST == ret ? OB_ENTRY_NOT_EXIST : ret; LOG_WARN("get from task map failed", K(ret), K(task_id)); @@ -359,6 +370,8 @@ int ObDDLTaskQueue::abort_task(const ObDDLTaskID &task_id) } else if (OB_UNLIKELY(!task_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(task_id)); + } else if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, GET_TASK_FROM_QUEUE_FAILED))) { + LOG_WARN("ddl sim failure: get task from queue failed", K(ret), K(task_id)); } else if (OB_FAIL(task_id_map_.get_refactored(task_id, ddl_task))) { ret = OB_HASH_NOT_EXIST == ret ? OB_ENTRY_NOT_EXIST : ret; LOG_WARN("get from task map failed", K(ret), K(task_id)); @@ -373,6 +386,8 @@ int ObDDLTaskQueue::abort_task(const ObDDLTaskID &task_id) LOG_WARN("invalid arguments", K(ret), K(trace_id)); } else if (OB_FAIL(SYS_TASK_STATUS_MGR.cancel_task(trace_id))) { LOG_WARN("cancel task failed", K(ret)); + } else if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, CANCEL_SYS_TASK_FAILED))) { + LOG_WARN("ddl sim failure: get task from queue failed", K(ret), K(task_id)); } else { LOG_INFO("succeed to abort task", K(task_id)); } @@ -417,7 +432,9 @@ int ObDDLTaskHeartBeatMananger::update_task_active_time(const ObDDLTaskID &task_ } else { ObBucketHashWLockGuard lock_guard(bucket_lock_, task_id.task_id_); // setting flag=1 to update the old time-value in the hash map with current time - if (OB_FAIL(register_task_time_.set_refactored(task_id, + if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, HEART_BEAT_UPDATE_ACTIVE_TIME))) { + LOG_WARN("ddl sim failed", K(ret), K(task_id)); + } else if (OB_FAIL(register_task_time_.set_refactored(task_id, ObTimeUtility::current_time(), 1, 0, 0))) { LOG_WARN("set register task time failed", K(ret), K(task_id)); } @@ -951,6 +968,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.parallelism_, param.parent_task_id_, param.consumer_group_id_, + param.sub_task_trace_id_, create_index_arg, *param.allocator_, task_record))) { @@ -964,6 +982,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.src_table_schema_, param.parent_task_id_, param.consumer_group_id_, + param.sub_task_trace_id_, drop_index_arg, *param.allocator_, task_record))) { @@ -985,6 +1004,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.parallelism_, param.consumer_group_id_, param.task_id_, + param.sub_task_trace_id_, static_cast(param.ddl_arg_), *param.allocator_, task_record))) { @@ -999,6 +1019,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.parallelism_, param.consumer_group_id_, param.task_id_, + param.sub_task_trace_id_, static_cast(param.ddl_arg_), *param.allocator_, task_record))) { @@ -1014,6 +1035,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.parallelism_, param.consumer_group_id_, param.task_id_, + param.sub_task_trace_id_, alter_table_arg, *param.allocator_, task_record))) { @@ -1031,6 +1053,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, static_cast(param.ddl_arg_), param.parent_task_id_, param.consumer_group_id_, + param.sub_task_trace_id_, *param.allocator_, task_record))) { LOG_WARN("fail to create constraint task failed", K(ret)); @@ -1046,6 +1069,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.parallelism_, param.consumer_group_id_, param.task_id_, + param.sub_task_trace_id_, static_cast(param.ddl_arg_), *param.allocator_, task_record))) { @@ -1059,6 +1083,7 @@ int ObDDLScheduler::create_ddl_task(const ObCreateDDLTaskParam ¶m, param.schema_version_, param.consumer_group_id_, param.task_id_, + param.sub_task_trace_id_, static_cast(param.ddl_arg_), *param.allocator_, task_record))) { @@ -1105,6 +1130,8 @@ int ObDDLScheduler::add_sys_task(ObDDLTask *task) } else { LOG_WARN("add task failed", K(ret)); } + } else if (OB_FAIL(DDL_SIM(task->get_tenant_id(), task->get_task_id(), DDL_SCHEDULER_ADD_SYS_TASK_FAILED))) { + LOG_WARN("ddl sim failure: add sys task failed", K(ret)); } else { task->set_sys_task_id(sys_task_status.task_id_); LOG_INFO("add sys task", K(sys_task_status.task_id_)); @@ -1126,6 +1153,8 @@ int ObDDLScheduler::remove_sys_task(ObDDLTask *task) if (!task_id.is_invalid()) { if (OB_FAIL(SYS_TASK_STATUS_MGR.del_task(task_id))) { LOG_WARN("del task failed", K(ret), K(task_id)); + } else if (OB_FAIL(DDL_SIM(task->get_tenant_id(), task->get_task_id(), DDL_SCHEDULER_REMOVE_SYS_TASK_FAILED))) { + LOG_WARN("ddl sim failure: remove ddl task", K(ret), K(task->get_ddl_task_id())); } else { LOG_INFO("remove sys task", K(task_id)); } @@ -1230,6 +1259,8 @@ int ObDDLScheduler::modify_redef_task(const ObDDLTaskID &task_id, ObRedefCallbac if (OB_UNLIKELY(!task_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(task_id)); + } else if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, REDEF_TABLE_ABORT_FAILED))) { + LOG_WARN("ddl sim failure: abort_redef_table", K(ret), K(task_id)); } else if (OB_FAIL(trans.start(&root_service_->get_sql_proxy(), task_id.tenant_id_))) { LOG_WARN("start transaction failed", K(ret), K_(task_id.tenant_id)); } else if (OB_FAIL(ObDDLTaskRecordOperator::select_for_update(trans, @@ -1315,6 +1346,8 @@ int ObDDLScheduler::copy_table_dependents(const ObDDLTaskID &task_id, } else if (OB_FAIL(infos.set_refactored("is_ignore_errors", is_ignore_errors))) { LOG_WARN("set item failed", K(ret)); } else if (FALSE_IT(cb.set_infos(&infos))) { + } else if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, REDEF_TABLE_COPY_DEPES_FAILED))) { + LOG_WARN("ddl sim failure: copy_table_dependents", K(ret), K(task_id)); } else if (OB_FAIL(modify_redef_task(task_id, cb))) { LOG_WARN("fail to modify redef task", K(ret), K(task_id)); } @@ -1325,7 +1358,9 @@ int ObDDLScheduler::finish_redef_table(const ObDDLTaskID &task_id) { int ret = OB_SUCCESS; ObFinishRedefCallback cb; - if (OB_FAIL(modify_redef_task(task_id, cb))) { + if (OB_FAIL(DDL_SIM(task_id.tenant_id_, task_id.task_id_, REDEF_TABLE_FINISH_FAILED))) { + LOG_WARN("ddl sim failure: copy_table_dependents", K(ret), K(task_id)); + } else if (OB_FAIL(modify_redef_task(task_id, cb))) { LOG_WARN("fail to modify redef task", K(ret), K(task_id)); } return ret; @@ -1374,14 +1409,14 @@ int ObDDLScheduler::start_redef_table(const obrpc::ObStartRedefTableArg &arg, ob HEAP_VAR(obrpc::ObAlterTableArg, alter_table_arg) { ObPrepareAlterTableArgParam param; if (OB_FAIL(param.init(arg.session_id_, - arg.sql_mode_, - arg.ddl_stmt_str_, - orig_table_schema->get_table_name_str(), - orig_database_schema->get_database_name_str(), - orig_database_schema->get_database_name_str(), - arg.tz_info_, - arg.tz_info_wrap_, - arg.nls_formats_))) { + arg.sql_mode_, + arg.ddl_stmt_str_, + orig_table_schema->get_table_name_str(), + orig_database_schema->get_database_name_str(), + orig_database_schema->get_database_name_str(), + arg.tz_info_, + arg.tz_info_wrap_, + arg.nls_formats_))) { LOG_WARN("param init failed", K(ret)); } else if (OB_FAIL(prepare_alter_table_arg(param, target_table_schema, alter_table_arg))) { LOG_WARN("failed to build alter table arg", K(ret)); @@ -1408,6 +1443,8 @@ int ObDDLScheduler::start_redef_table(const obrpc::ObStartRedefTableArg &arg, ob res.tenant_id_ = task_record.tenant_id_; res.schema_version_ = task_record.schema_version_; } + add_event_info(task_record, "ddl_scheduler start redef table"); + LOG_INFO("ddl_scheduler start redef table", K(ret), "ddl_event_info", ObDDLEventInfo(), K(task_record)); } } } @@ -1421,6 +1458,7 @@ int ObDDLScheduler::create_build_index_task( const int64_t parallelism, const int64_t parent_task_id, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObCreateIndexArg *create_index_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record) @@ -1443,6 +1481,7 @@ int ObDDLScheduler::create_build_index_task( index_schema->get_schema_version(), parallelism, consumer_group_id, + sub_task_trace_id, *create_index_arg, parent_task_id))) { LOG_WARN("init global index task failed", K(ret), K(data_table_schema), K(index_schema)); @@ -1451,8 +1490,8 @@ int ObDDLScheduler::create_build_index_task( } else if (OB_FAIL(insert_task_record(proxy, index_task, allocator, task_record))) { LOG_WARN("fail to insert task record", K(ret)); } - - LOG_INFO("ddl_scheduler create build index task finished", K(ret), K(index_task)); + index_task.add_event_info("create build index task finish"); + LOG_INFO("ddl_scheduler create build index task finished", K(ret), "ddl_event_info", ObDDLEventInfo(), K(index_task)); } return ret; } @@ -1462,6 +1501,7 @@ int ObDDLScheduler::create_drop_index_task( const share::schema::ObTableSchema *index_schema, const int64_t parent_task_id, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObDropIndexArg *drop_index_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record) @@ -1490,6 +1530,7 @@ int ObDDLScheduler::create_drop_index_task( index_schema->get_schema_version(), parent_task_id, consumer_group_id, + sub_task_trace_id, *drop_index_arg))) { LOG_WARN("init drop index task failed", K(ret), K(data_table_id), K(index_table_id)); } else if (OB_FAIL(index_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { @@ -1498,9 +1539,11 @@ int ObDDLScheduler::create_drop_index_task( LOG_WARN("fail to insert task record", K(ret)); } } - LOG_INFO("ddl_scheduler create drop index task finished", K(ret), K(index_task)); + index_task.add_event_info("create drop index task finish"); + LOG_INFO("ddl_scheduler create drop index task finished", K(ret), "ddl_event_info", ObDDLEventInfo(), K(index_task)); return ret; } + int ObDDLScheduler::create_constraint_task( common::ObISQLClient &proxy, const share::schema::ObTableSchema *table_schema, @@ -1510,6 +1553,7 @@ int ObDDLScheduler::create_constraint_task( const obrpc::ObAlterTableArg *arg, const int64_t parent_task_id, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, ObIAllocator &allocator, ObDDLTaskRecord &task_record) { @@ -1525,14 +1569,15 @@ int ObDDLScheduler::create_constraint_task( LOG_WARN("invalid argument", K(ret), K(table_schema), K(constraint_id), K(schema_version), K(arg)); } else if (OB_FAIL(ObDDLTask::fetch_new_task_id(root_service_->get_sql_proxy(), table_schema->get_tenant_id(), task_id))) { LOG_WARN("fetch new task id failed", K(ret)); - } else if (OB_FAIL(constraint_task.init(task_id, table_schema, constraint_id, ddl_type, schema_version, *arg, consumer_group_id, parent_task_id))) { + } else if (OB_FAIL(constraint_task.init(task_id, table_schema, constraint_id, ddl_type, schema_version, *arg, consumer_group_id, sub_task_trace_id, parent_task_id))) { LOG_WARN("init constraint task failed", K(ret), K(table_schema), K(constraint_id)); } else if (OB_FAIL(constraint_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { LOG_WARN("set trace id failed", K(ret)); } else if (OB_FAIL(insert_task_record(proxy, constraint_task, allocator, task_record))) { LOG_WARN("fail to insert task record", K(ret)); } - LOG_INFO("ddl_scheduler create constraint task finished", K(ret), K(constraint_task)); + constraint_task.add_event_info("create constraint task finish"); + LOG_INFO("ddl_scheduler create constraint task finished", K(ret), "ddl_event_info", ObDDLEventInfo(), K(constraint_task)); } return ret; } @@ -1545,6 +1590,7 @@ int ObDDLScheduler::create_table_redefinition_task( const int64_t parallelism, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record) @@ -1567,6 +1613,7 @@ int ObDDLScheduler::create_table_redefinition_task( dest_schema->get_schema_version(), parallelism, consumer_group_id, + sub_task_trace_id, *alter_table_arg))) { LOG_WARN("fail to init redefinition task", K(ret)); } else if (OB_FAIL(redefinition_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { @@ -1574,7 +1621,8 @@ int ObDDLScheduler::create_table_redefinition_task( } else if (OB_FAIL(insert_task_record(proxy, redefinition_task, allocator, task_record))) { LOG_WARN("fail to insert task record", K(ret)); } - LOG_INFO("ddl_scheduler create table redefinition task finished", K(ret), K(redefinition_task), K(common::lbt())); + redefinition_task.add_event_info("create drop primary key task finish"); + LOG_INFO("ddl_scheduler create table redefinition task finished", K(ret), "ddl_event_info", ObDDLEventInfo(), K(redefinition_task), K(common::lbt())); } return ret; } @@ -1587,6 +1635,7 @@ int ObDDLScheduler::create_drop_primary_key_task( const int64_t parallelism, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record) @@ -1607,6 +1656,7 @@ int ObDDLScheduler::create_drop_primary_key_task( dest_schema->get_schema_version(), parallelism, consumer_group_id, + sub_task_trace_id, *alter_table_arg))) { LOG_WARN("fail to init redefinition task", K(ret)); } else if (OB_FAIL(drop_pk_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { @@ -1614,7 +1664,8 @@ int ObDDLScheduler::create_drop_primary_key_task( } else if (OB_FAIL(insert_task_record(proxy, drop_pk_task, allocator, task_record))) { LOG_WARN("fail to insert task record", K(ret)); } - LOG_INFO("ddl_scheduler create drop primary key task finished", K(ret), K(drop_pk_task)); + drop_pk_task.add_event_info("create drop primary key task finish"); + LOG_INFO("ddl_scheduler create drop primary key task finished", K(ret), "ddl_event_info", ObDDLEventInfo(), K(drop_pk_task)); } return ret; } @@ -1627,6 +1678,7 @@ int ObDDLScheduler::create_column_redefinition_task( const int64_t parallelism, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record) @@ -1647,6 +1699,7 @@ int ObDDLScheduler::create_column_redefinition_task( dest_schema->get_schema_version(), parallelism, consumer_group_id, + sub_task_trace_id, *alter_table_arg))) { LOG_WARN("fail to init redefinition task", K(ret)); } else if (OB_FAIL(redefinition_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { @@ -1654,7 +1707,8 @@ int ObDDLScheduler::create_column_redefinition_task( } else if (OB_FAIL(insert_task_record(proxy, redefinition_task, allocator, task_record))) { LOG_WARN("fail to insert task record", K(ret)); } - LOG_INFO("ddl_scheduler create column redefinition task finished", K(ret), K(redefinition_task)); + redefinition_task.add_event_info("create column redefinition task finish"); + LOG_INFO("ddl_scheduler create column redefinition task finished", K(ret), "ddl_event_info", ObDDLEventInfo(), K(redefinition_task)); } return ret; } @@ -1666,6 +1720,7 @@ int ObDDLScheduler::create_modify_autoinc_task( const int64_t schema_version, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record) @@ -1679,14 +1734,15 @@ int ObDDLScheduler::create_modify_autoinc_task( || schema_version <= 0 || 0 == task_id || nullptr == arg || !arg->is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(tenant_id), K(table_id), K(schema_version), K(task_id), K(arg)); - } else if (OB_FAIL(modify_autoinc_task.init(tenant_id, task_id, table_id, schema_version, consumer_group_id, *arg))) { + } else if (OB_FAIL(modify_autoinc_task.init(tenant_id, task_id, table_id, schema_version, consumer_group_id, sub_task_trace_id, *arg))) { LOG_WARN("init global index task failed", K(ret), K(table_id), K(arg)); } else if (OB_FAIL(modify_autoinc_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { LOG_WARN("set trace id failed", K(ret)); } else if (OB_FAIL(insert_task_record(proxy, modify_autoinc_task, allocator, task_record))) { LOG_WARN("fail to insert task record", K(ret)); } - LOG_INFO("ddl_scheduler create modify autoinc task finished", K(ret), K(modify_autoinc_task)); + modify_autoinc_task.add_event_info("create modify autoinc task finish"); + LOG_INFO("ddl_scheduler create modify autoinc task finished", K(ret), "ddl_event_info", ObDDLEventInfo(), K(modify_autoinc_task)); } return ret; } @@ -1699,6 +1755,7 @@ int ObDDLScheduler::create_ddl_retry_task( const uint64_t object_id, const int64_t schema_version, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const share::ObDDLType &type, const obrpc::ObDDLArg *arg, ObIAllocator &allocator, @@ -1716,14 +1773,15 @@ int ObDDLScheduler::create_ddl_retry_task( LOG_WARN("invalid argument", K(ret), K(tenant_id), K(object_id), K(schema_version), K(arg)); } else if (OB_FAIL(ObDDLTask::fetch_new_task_id(root_service_->get_sql_proxy(), tenant_id, task_id))) { LOG_WARN("fetch new task id failed", K(ret)); - } else if (OB_FAIL(ddl_retry_task.init(tenant_id, task_id, object_id, schema_version, consumer_group_id, type, arg))) { + } else if (OB_FAIL(ddl_retry_task.init(tenant_id, task_id, object_id, schema_version, consumer_group_id, sub_task_trace_id,type, arg))) { LOG_WARN("init ddl retry task failed", K(ret), K(arg)); } else if (OB_FAIL(ddl_retry_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { LOG_WARN("set trace id failed", K(ret)); } else if (OB_FAIL(insert_task_record(proxy, ddl_retry_task, allocator, task_record))) { LOG_WARN("fail to insert task record", K(ret)); } - LOG_INFO("ddl_scheduler create ddl retry task finished", K(ret), K(ddl_retry_task)); + ddl_retry_task.add_event_info("create ddl retry task finish"); + LOG_INFO("ddl_scheduler create ddl retry task finished", K(ret), "ddl_event_info", ObDDLEventInfo(), K(ddl_retry_task)); return ret; } @@ -1735,6 +1793,7 @@ int ObDDLScheduler::create_recover_restore_table_task( const int64_t parallelism, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record) @@ -1757,6 +1816,7 @@ int ObDDLScheduler::create_recover_restore_table_task( dest_schema->get_schema_version(), parallelism, consumer_group_id, + sub_task_trace_id, *alter_table_arg))) { LOG_WARN("fail to init redefinition task", K(ret)); } else if (OB_FAIL(redefinition_task.set_trace_id(*ObCurTraceId::get_trace_id()))) { @@ -1791,6 +1851,9 @@ int ObDDLScheduler::insert_task_record( } else { LOG_WARN("insert ddl task record failed", K(ret), K(task_record)); } + } else if (OB_FAIL(DDL_SIM_WHEN(ddl_task.get_parent_task_id() > 0, + ddl_task.get_tenant_id(), ddl_task.get_task_id(), INSERT_CHILD_DDL_TASK_RECORD_EXIST))) { + LOG_WARN("sim ddl task record exist", K(ret), K(ddl_task)); } return ret; } @@ -1824,7 +1887,7 @@ int ObDDLScheduler::recover_task() } else if (OB_FAIL(ObAllTenantInfoProxy::get_primary_tenant_ids(&root_service_->get_sql_proxy(), primary_tenant_ids))) { LOG_WARN("get primary tenant id failed", K(ret)); } - LOG_INFO("start processing ddl recovery", K(task_records), K(primary_tenant_ids)); + LOG_INFO("start processing ddl recovery", "ddl_event_info", ObDDLEventInfo(), K(task_records), K(primary_tenant_ids)); for (int64_t i = 0; OB_SUCC(ret) && i < task_records.count(); ++i) { const ObDDLTaskRecord &cur_record = task_records.at(i); int64_t tenant_schema_version = 0; @@ -1862,11 +1925,28 @@ int ObDDLScheduler::recover_task() ret = (OB_SUCCESS == ret) ? tmp_ret : ret; } ret = OB_SUCCESS; // ignore ret + + add_event_info(cur_record, "recover ddl task"); + LOG_INFO("recover ddl task", K(ret), "ddl_event_info", ObDDLEventInfo(), K(cur_record)); } } return ret; } +void ObDDLScheduler::add_event_info(const ObDDLTaskRecord &ddl_record, const ObString &ddl_event_stmt) +{ + char object_id_buffer[256]; + snprintf(object_id_buffer, sizeof(object_id_buffer), "%ld %ld", ddl_record.object_id_, ddl_record.target_object_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", ddl_event_stmt.ptr(), + "tenant_id", ddl_record.tenant_id_, + "ret", ddl_record.ret_code_, + "trace_id", ddl_record.trace_id_, + "task_id", ddl_record.task_id_, + "task_status", ddl_record.task_status_, + "schema_version_", ddl_record.schema_version_, + object_id_buffer); +} + int ObDDLScheduler::remove_inactive_ddl_task() { int ret = OB_SUCCESS; @@ -1910,6 +1990,8 @@ int ObDDLScheduler::schedule_ddl_task(const ObDDLTaskRecord &record) if (OB_UNLIKELY(!record.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl task record is invalid", K(ret), K(record)); + } else if (OB_FAIL(DDL_SIM(record.tenant_id_, record.task_id_, SCHEDULE_DDL_TASK_FAILED))) { + LOG_WARN("sim schedule ddl task failed", K(ret)); } else { switch (record.ddl_type_) { case ObDDLType::DDL_CREATE_INDEX: @@ -1993,6 +2075,10 @@ int ObDDLScheduler::schedule_build_index_task( } } } + if (nullptr != build_index_task) { + build_index_task->add_event_info("schedule build index task finish"); + LOG_INFO("ddl_scheduler schedule build index task", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(build_index_task)); + } if (OB_FAIL(ret) && nullptr != build_index_task) { build_index_task->~ObIndexBuildTask(); allocator_.free(build_index_task); @@ -2019,6 +2105,10 @@ int ObDDLScheduler::schedule_drop_primary_key_task(const ObDDLTaskRecord &task_r LOG_WARN("inner schedule task failed", K(ret), K(*drop_pk_task)); } } + if (nullptr != drop_pk_task) { + drop_pk_task->add_event_info("schedule drop primary key task finish"); + LOG_INFO("ddl_scheduler schedule drop primary key task", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(drop_pk_task)); + } if (OB_FAIL(ret) && nullptr != drop_pk_task) { drop_pk_task->~ObDropPrimaryKeyTask(); allocator_.free(drop_pk_task); @@ -2048,6 +2138,10 @@ int ObDDLScheduler::schedule_table_redefinition_task(const ObDDLTaskRecord &task && OB_FAIL(manager_reg_heart_beat_task_.update_task_active_time(ObDDLTaskID(task_record.tenant_id_, task_record.task_id_)))) { LOG_WARN("register_task_time recover fail", K(ret)); } + if (nullptr != redefinition_task) { + redefinition_task->add_event_info("schedule table redefinition task finish"); + LOG_INFO("ddl_scheduler schedule table redefinition task", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(redefinition_task)); + } if (OB_FAIL(ret) && nullptr != redefinition_task) { redefinition_task->~ObTableRedefinitionTask(); allocator_.free(redefinition_task); @@ -2074,6 +2168,10 @@ int ObDDLScheduler::schedule_column_redefinition_task(const ObDDLTaskRecord &tas LOG_WARN("inner schedule task failed", K(ret), K(*redefinition_task)); } } + if (nullptr != redefinition_task) { + redefinition_task->add_event_info("schedule column redefinition task finish"); + LOG_INFO("ddl_scheduler schedule column redefinition task", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(redefinition_task)); + } if (OB_FAIL(ret) && nullptr != redefinition_task) { redefinition_task->~ObColumnRedefinitionTask(); allocator_.free(redefinition_task); @@ -2100,6 +2198,10 @@ int ObDDLScheduler::schedule_ddl_retry_task(const ObDDLTaskRecord &task_record) LOG_WARN("inner schedule task failed", K(ret)); } } + if (nullptr != ddl_retry_task) { + ddl_retry_task->add_event_info("schedule ddl retry task finish"); + LOG_INFO("ddl_scheduler schedule ddl retry task", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(ddl_retry_task)); + } if (OB_FAIL(ret) && nullptr != ddl_retry_task) { ddl_retry_task->~ObDDLRetryTask(); allocator_.free(ddl_retry_task); @@ -2126,6 +2228,10 @@ int ObDDLScheduler::schedule_constraint_task(const ObDDLTaskRecord &task_record) LOG_WARN("inner schedule task failed", K(ret)); } } + if (nullptr != constraint_task) { + constraint_task->add_event_info("schedule constraint task finish"); + LOG_INFO("ddl_scheduler schedule constraint task", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(constraint_task)); + } if (OB_FAIL(ret) && nullptr != constraint_task) { constraint_task->~ObConstraintTask(); allocator_.free(constraint_task); @@ -2152,6 +2258,10 @@ int ObDDLScheduler::schedule_modify_autoinc_task(const ObDDLTaskRecord &task_rec LOG_WARN("inner schedule task failed", K(ret)); } } + if (nullptr != modify_autoinc_task) { + modify_autoinc_task->add_event_info("schedule modify autoinc task finish"); + LOG_INFO("ddl_scheduler schedule modify autoinc task", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(modify_autoinc_task)); + } if (OB_FAIL(ret) && nullptr != modify_autoinc_task) { modify_autoinc_task->~ObModifyAutoincTask(); allocator_.free(modify_autoinc_task); @@ -2178,6 +2288,10 @@ int ObDDLScheduler::schedule_drop_index_task(const ObDDLTaskRecord &task_record) LOG_WARN("inner schedule task failed", K(ret)); } } + if (nullptr != drop_index_task) { + drop_index_task->add_event_info("schedule drop index task finish"); + LOG_INFO("ddl_scheduler schedule drop index task", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(drop_index_task)); + } if (OB_FAIL(ret) && nullptr != drop_index_task) { drop_index_task->~ObDropIndexTask(); allocator_.free(drop_index_task); @@ -2290,6 +2404,8 @@ int ObDDLScheduler::inner_schedule_ddl_task(ObDDLTask *ddl_task, } else if (!is_started_) { ret = OB_NOT_RUNNING; LOG_WARN("ddl schedule is not start", K(ret)); + } else if (OB_FAIL(DDL_SIM(ddl_task->get_tenant_id(), ddl_task->get_task_id(), DDL_SCHEDULER_STOPPED))) { + LOG_WARN("ddl sim failure: ddl scheduler not running", K(ret), K(ddl_task->get_ddl_task_id())); } else { int tmp_ret = OB_SUCCESS; bool longops_added = true; @@ -2334,17 +2450,20 @@ int ObDDLScheduler::on_column_checksum_calc_reply( if (OB_UNLIKELY(ObDDLType::DDL_CREATE_INDEX != task.get_task_type())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ddl task type not global index", K(ret), K(task)); + } else if (OB_FAIL(DDL_SIM(task.get_tenant_id(), task.get_task_id(), ON_COLUMN_CHECKSUM_REPLY_FAILED))) { + LOG_WARN("sim column checksum reply failed", K(ret)); } else if (OB_FAIL(reinterpret_cast(&task)->update_column_checksum_calc_status(tablet_id, ret_code))) { LOG_WARN("update column checksum calc status failed", K(ret)); } return ret; + task.add_event_info("on column checksum calc reply"); }))) { LOG_WARN("failed to modify task", K(ret)); } if (OB_ENTRY_NOT_EXIST == ret) { ret = OB_NEED_RETRY; } - LOG_INFO("receive column checksum response", K(ret), K(tablet_id), K(task_key), K(ret_code)); + LOG_INFO("receive column checksum response", K(ret), "ddl_event_info", ObDDLEventInfo(), K(tablet_id), K(task_key), K(ret_code)); return ret; } @@ -2426,12 +2545,14 @@ int ObDDLScheduler::on_sstable_complement_job_reply( break; } return ret; + task.add_event_info("on sstable complement job reply"); }))) { LOG_WARN("failed to modify task", K(ret)); } if (OB_ENTRY_NOT_EXIST == ret) { ret = OB_NEED_RETRY; } + LOG_INFO("ddl sstable complement job reply", K(ret), "ddl_event_info", ObDDLEventInfo(), K(tablet_id), K(task_key), K(ret_code)); return ret; } @@ -2453,10 +2574,12 @@ int ObDDLScheduler::on_ddl_task_finish( if (OB_FAIL(task_queue_.modify_task(parent_task_id, [&child_task_key, &ret_code](ObDDLTask &task) -> int { ObDDLRedefinitionTask *redefinition_task = static_cast(&task); return redefinition_task->on_child_task_finish(child_task_key.object_id_, ret_code); + task.add_event_info("ddl task finish"); }))) { LOG_WARN("failed to modify task", K(ret)); } } + LOG_INFO("ddl task on finish", K(ret), K(ret_code), "ddl_event_info", ObDDLEventInfo(), K(parent_task_id), K(child_task_key), K(parent_task_trace_id)); return ret; } diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.h b/src/rootserver/ddl_task/ob_ddl_scheduler.h index 2e15a9dfdb..1011d1868c 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.h +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.h @@ -308,7 +308,11 @@ private: private: void runTimerTask() override; private: +#ifdef ERRSIM + static const int64_t DDL_TASK_SCAN_PERIOD = 1000L * 1000L; // 1s +#else static const int64_t DDL_TASK_SCAN_PERIOD = 60 * 1000L * 1000L; // 60s +#endif ObDDLScheduler &ddl_scheduler_; }; @@ -321,7 +325,11 @@ private: private: void runTimerTask() override; private: +#ifdef ERRSIM + static const int64_t DDL_TASK_CHECK_PERIOD = 1000L * 1000L; // 1s +#else static const int64_t DDL_TASK_CHECK_PERIOD = 30 * 1000L * 1000L; // 30s +#endif ObDDLScheduler &ddl_scheduler_; }; private: @@ -343,6 +351,7 @@ private: const int64_t parallelism, const int64_t parent_task_id, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObCreateIndexArg *create_index_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -355,6 +364,7 @@ private: const obrpc::ObAlterTableArg *arg, const int64_t parent_task_id, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -366,6 +376,7 @@ private: const int64_t parallelism, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -378,6 +389,7 @@ private: const int64_t parallelism, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -390,6 +402,7 @@ private: const int64_t parallelism, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -401,6 +414,7 @@ private: const int64_t schema_version, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -410,6 +424,7 @@ private: const share::schema::ObTableSchema *index_schema, const int64_t parent_task_id, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObDropIndexArg *drop_index_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -420,6 +435,7 @@ private: const uint64_t object_id, const int64_t schema_version, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const share::ObDDLType &type, const obrpc::ObDDLArg *arg, ObIAllocator &allocator, @@ -433,6 +449,7 @@ private: const int64_t parallelism, const int64_t consumer_group_id, const int64_t task_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg *alter_table_arg, ObIAllocator &allocator, ObDDLTaskRecord &task_record); @@ -452,6 +469,7 @@ private: int add_task_to_longops_mgr(ObDDLTask *ddl_task); int remove_task_from_longops_mgr(ObDDLTask *ddl_task); int remove_ddl_task(ObDDLTask *ddl_task); + void add_event_info(const ObDDLTaskRecord &ddl_record, const ObString &ddl_event_stmt); private: static const int64_t TOTAL_LIMIT = 1024L * 1024L * 1024L; diff --git a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp index d7059e75ba..f9444aa2b8 100644 --- a/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp +++ b/src/rootserver/ddl_task/ob_ddl_single_replica_executor.cpp @@ -16,6 +16,7 @@ #include "rootserver/ob_root_service.h" #include "rootserver/ob_rs_async_rpc_proxy.h" #include "share/ob_ddl_common.h" +#include "share/ob_ddl_sim_point.h" #include "share/ob_srv_rpc_proxy.h" #include "share/location_cache/ob_location_service.h" @@ -30,6 +31,8 @@ int ObDDLSingleReplicaExecutor::build(const ObDDLSingleReplicaExecutorParam &par if (OB_UNLIKELY(!param.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(param)); + } else if (OB_FAIL(DDL_SIM(param.tenant_id_, param.task_id_, SINGLE_REPLICA_EXECUTOR_BUILD_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(param.tenant_id_), K(param.task_id_)); } else { ObSpinLockGuard guard(lock_); tenant_id_ = param.tenant_id_; @@ -79,11 +82,23 @@ int ObDDLSingleReplicaExecutor::build(const ObDDLSingleReplicaExecutorParam &par } } } + char table_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "dest_table_id:%ld, source_table_id:%ld", dest_table_id_, source_table_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "build single replica", + "tenant_id",tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + K_(task_id), + "type", type_, + K_(schema_version), + table_id_buffer); if (OB_SUCC(ret)) { - LOG_INFO("start to schedule task", K(source_tablet_ids_.count()), K(dest_table_id_)); + LOG_INFO("start to schedule task", K(source_tablet_ids_.count()), K(dest_table_id_), "ddl_event_info", ObDDLEventInfo()); if (OB_FAIL(schedule_task())) { LOG_WARN("fail to schedule tasks", K(ret)); } + } else { + LOG_INFO("fail to build single replica task", K(ret), K(dest_table_id_), "ddl_event_info", ObDDLEventInfo()); } return ret; } @@ -96,6 +111,8 @@ int ObDDLSingleReplicaExecutor::schedule_task() if (OB_ISNULL(rpc_proxy) || OB_ISNULL(location_service)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), KP(rpc_proxy), KP(location_service)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, SINGLE_REPLICA_EXECUTOR_SCHEDULE_TASK_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { ObDDLBuildSingleReplicaRequestProxy proxy(*rpc_proxy, &obrpc::ObSrvRpcProxy::build_ddl_single_replica_request); @@ -172,7 +189,18 @@ int ObDDLSingleReplicaExecutor::schedule_task() } else if (OB_FAIL(proxy.call(dest_leader_addr, rpc_timeout, dest_tenant_id_, arg))) { LOG_WARN("fail to send rpc", K(ret), K(rpc_timeout)); } else { - LOG_INFO("send build single replica request", K(arg), K(dest_leader_addr)); + LOG_INFO("send build single replica request", K(arg), K(dest_leader_addr), "ddl_event_info", ObDDLEventInfo()); + char table_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "data_table_id:%ld, dest_table_id:%ld", + source_table_id_, dest_table_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "schedule single replica task", + "tenant_id", dest_tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + K_(task_id), + "table_id", table_id_buffer, + "dest_leader_addr", dest_leader_addr, + orig_leader_addr); } } if (OB_TMP_FAIL(proxy.wait_all(ret_array))) { diff --git a/src/rootserver/ddl_task/ob_ddl_task.cpp b/src/rootserver/ddl_task/ob_ddl_task.cpp index c34e3226e8..666db6a33e 100644 --- a/src/rootserver/ddl_task/ob_ddl_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_task.cpp @@ -41,6 +41,7 @@ #include "storage/ob_common_id_utils.h" #include "storage/tx/ob_ts_mgr.h" #include "observer/ob_server_struct.h" +#include "share/ob_ddl_sim_point.h" const bool OB_DDL_TASK_ENABLE_TRACING = false; @@ -131,13 +132,15 @@ ObDDLTaskSerializeField::ObDDLTaskSerializeField(const int64_t task_version, const int64_t parallelism, const int64_t data_format_version, const int64_t consumer_group_id, - const bool is_abort) + const bool is_abort, + const int32_t sub_task_trace_id) { task_version_ = task_version; parallelism_ = parallelism; data_format_version_ = data_format_version; consumer_group_id_ = consumer_group_id; is_abort_ = is_abort; + sub_task_trace_id_ = sub_task_trace_id; } void ObDDLTaskSerializeField::reset() @@ -147,6 +150,7 @@ void ObDDLTaskSerializeField::reset() data_format_version_ = 0; consumer_group_id_ = 0; is_abort_ = false; + sub_task_trace_id_ = 0; } OB_SERIALIZE_MEMBER(ObDDLTaskSerializeField, @@ -154,10 +158,11 @@ OB_SERIALIZE_MEMBER(ObDDLTaskSerializeField, parallelism_, data_format_version_, consumer_group_id_, - is_abort_); + is_abort_, + sub_task_trace_id_); ObCreateDDLTaskParam::ObCreateDDLTaskParam() - : tenant_id_(OB_INVALID_ID), object_id_(OB_INVALID_ID), schema_version_(0), parallelism_(0), consumer_group_id_(0), parent_task_id_(0), task_id_(0), + : sub_task_trace_id_(0), tenant_id_(OB_INVALID_ID), object_id_(OB_INVALID_ID), schema_version_(0), parallelism_(0), consumer_group_id_(0), parent_task_id_(0), task_id_(0), type_(DDL_INVALID), src_table_schema_(nullptr), dest_table_schema_(nullptr), ddl_arg_(nullptr), allocator_(nullptr) { } @@ -174,7 +179,7 @@ ObCreateDDLTaskParam::ObCreateDDLTaskParam(const uint64_t tenant_id, const obrpc::ObDDLArg *ddl_arg, const int64_t parent_task_id, const int64_t task_id) - : tenant_id_(tenant_id), object_id_(object_id), schema_version_(schema_version), parallelism_(parallelism), consumer_group_id_(consumer_group_id), + : sub_task_trace_id_(0), tenant_id_(tenant_id), object_id_(object_id), schema_version_(schema_version), parallelism_(parallelism), consumer_group_id_(consumer_group_id), parent_task_id_(parent_task_id), task_id_(task_id), type_(type), src_table_schema_(src_table_schema), dest_table_schema_(dest_table_schema), ddl_arg_(ddl_arg), allocator_(allocator) { @@ -785,6 +790,47 @@ int ObDDLTask::get_ddl_type_str(const int64_t ddl_type, const char *&ddl_type_st return ret; } +void ObDDLTask::add_event_info(const ObString &ddl_event_stmt) +{ + char object_id_buffer[256]; + const char *status_str = ddl_task_status_to_str(task_status_); + snprintf(object_id_buffer, sizeof(object_id_buffer), "object_id:%ld, target_object_id:%ld", object_id_, target_object_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", ddl_event_stmt.ptr(), + "tenant_id", tenant_id_, + "ret", ret_code_, + K_(trace_id), + K_(task_id), + "task_status", status_str, + K_(schema_version), + object_id_buffer); +} + +void ObDDLTask::add_event_info(const share::ObDDLTaskStatus status, const uint64_t tenant_id) +{ + const char *status_str = ddl_task_status_to_str(status); + char object_id_buffer[256]; + snprintf(object_id_buffer, sizeof(object_id_buffer), "object_id:%ld, target_object_id:%ld", object_id_, target_object_id_); + if (status_str) { + ROOTSERVICE_EVENT_ADD("ddl scheduler", "switch_state", + "tenant_id", tenant_id, + "ret", ret_code_, + "trace_id", *ObCurTraceId::get_trace_id(), + K_(task_id), + "object_id", object_id_buffer, + K_(snapshot_version), + status_str); + } else { + ROOTSERVICE_EVENT_ADD("ddl scheduler", "switch_state", + "tenant_id", tenant_id, + "ret", ret_code_, + "trace_id", *ObCurTraceId::get_trace_id(), + K_(task_id), + "object_id", object_id_buffer, + K_(snapshot_version), + status); + } +} + int ObDDLTask::deep_copy_table_arg(common::ObIAllocator &allocator, const ObDDLArg &source_arg, ObDDLArg &dest_arg) { int ret = OB_SUCCESS; @@ -846,10 +892,12 @@ int ObDDLTask::set_ddl_stmt_str(const ObString &ddl_stmt_str) int ObDDLTask::serialize_params_to_message(char *buf, const int64_t buf_size, int64_t &pos) const { int ret = OB_SUCCESS; - ObDDLTaskSerializeField serialize_field(task_version_, parallelism_, data_format_version_, consumer_group_id_, is_abort_); + ObDDLTaskSerializeField serialize_field(task_version_, parallelism_, data_format_version_, consumer_group_id_, is_abort_, sub_task_trace_id_); if (OB_UNLIKELY(nullptr == buf || buf_size <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), KP(buf), K(buf_size)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_ENCODE_MESSAGE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(serialize_field.serialize(buf, buf_size, pos))) { LOG_WARN("serialize_field serialize failed", K(ret)); } @@ -861,7 +909,9 @@ int ObDDLTask::deserlize_params_from_message(const uint64_t tenant_id, const cha int ret = OB_SUCCESS; ObDDLTaskSerializeField serialize_field; serialize_field.reset(); - if (OB_FAIL(serialize_field.deserialize(buf, buf_size, pos))) { + if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_DECODE_MESSAGE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(serialize_field.deserialize(buf, buf_size, pos))) { LOG_WARN("serialize_field deserialize failed", K(ret)); } else { task_version_ = serialize_field.task_version_; @@ -869,13 +919,14 @@ int ObDDLTask::deserlize_params_from_message(const uint64_t tenant_id, const cha data_format_version_ = serialize_field.data_format_version_; consumer_group_id_ = serialize_field.consumer_group_id_; is_abort_ = serialize_field.is_abort_; + sub_task_trace_id_ = serialize_field.sub_task_trace_id_; } return ret; } int64_t ObDDLTask::get_serialize_param_size() const { - ObDDLTaskSerializeField serialize_field(task_version_, parallelism_, data_format_version_, consumer_group_id_, is_abort_); + ObDDLTaskSerializeField serialize_field(task_version_, parallelism_, data_format_version_, consumer_group_id_, is_abort_, sub_task_trace_id_); return serialize_field.get_serialize_size(); } @@ -979,6 +1030,8 @@ int ObDDLTask::switch_status(const ObDDLTaskStatus new_status, const bool enable if (OB_ISNULL(root_service = GCTX.root_service_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(dst_tenant_id_, task_id_, CHECK_TENANT_STANDBY_FAILED))) { + LOG_WARN("ddl sim failure: check tenant standby failed", K(ret), K(dst_tenant_id_), K(task_id_)); } else if (OB_FAIL(ObDDLUtil::check_tenant_status_normal(&root_service->get_sql_proxy(), dst_tenant_id_))) { if (OB_TENANT_HAS_BEEN_DROPPED == ret || OB_STANDBY_READ_ONLY == ret) { need_retry_ = false; @@ -1026,15 +1079,9 @@ int ObDDLTask::switch_status(const ObDDLTaskStatus new_status, const bool enable ret = (OB_SUCCESS == ret) ? tmp_ret : ret; } if (OB_SUCC(ret) && old_status != real_new_status) { - const char *status_str = ddl_task_status_to_str(real_new_status); - if (status_str) { - ROOTSERVICE_EVENT_ADD("ddl_scheduler", "switch_state", "tenant_id", dst_tenant_id_, K_(task_id), K_(object_id), K_(target_object_id), - "new_state", status_str, K_(snapshot_version), ret_code_); - } else { - ROOTSERVICE_EVENT_ADD("ddl_scheduler", "switch_state", "tenant_id", dst_tenant_id_, K_(task_id), K_(object_id), K_(target_object_id), - "new_state", real_new_status, K_(snapshot_version), ret_code_); - } + add_event_info(real_new_status, dst_tenant_id_); task_status_ = real_new_status; + LOG_INFO("ddl_scheduler switch status", K(ret), "ddl_event_info", ObDDLEventInfo(), K(task_status_)); } if (OB_CANCELED == real_ret_code) { @@ -1061,6 +1108,8 @@ int ObDDLTask::refresh_schema_version() if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REFRESH_SCHEMA_VERSION_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(ObDDLUtil::check_schema_version_refreshed(tenant_id_, schema_version_))) { if (OB_SCHEMA_EAGAIN != ret) { LOG_WARN("check schema version refreshed failed", K(ret), K_(tenant_id), K_(schema_version)); @@ -1102,6 +1151,8 @@ int ObDDLTask::report_error_code(const ObString &forward_user_message, const int if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObIndexBuildTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(dst_tenant_id_, task_id_, REPORT_DDL_RET_CODE_FAILED))) { + LOG_WARN("ddl sim failure: report ddl ret code failed", K(ret), K(dst_tenant_id_), K(task_id_)); } else if (OB_FAIL(ObCompatModeGetter::check_is_oracle_mode_with_table_id(dst_tenant_id_, object_id_, is_oracle_mode))) { LOG_WARN("check if oracle mode failed", K(ret), K(object_id_)); } else { @@ -1163,7 +1214,7 @@ int ObDDLTask::report_error_code(const ObString &forward_user_message, const int } if (OB_SUCC(ret)) { - if (OB_FAIL(ObDDLErrorMessageTableOperator::report_ddl_error_message(error_message, dst_tenant_id_, task_id_, + if (OB_FAIL(ObDDLErrorMessageTableOperator::report_ddl_error_message(error_message, dst_tenant_id_, trace_id_, task_id_, parent_task_id_, target_object_id_, schema_version_, -1/*object id*/, GCTX.self_addr(), GCTX.root_service_->get_sql_proxy()))) { LOG_WARN("report ddl error message failed", K(ret)); } @@ -1215,7 +1266,7 @@ int ObDDLTask::wait_trans_end( } if (OB_SUCC(ret) && new_status != next_task_status && !wait_trans_ctx.is_inited()) { - if (OB_FAIL(wait_trans_ctx.init(tenant_id_, object_id_, + if (OB_FAIL(wait_trans_ctx.init(tenant_id_, task_id_, object_id_, ObDDLWaitTransEndCtx::WAIT_SCHEMA_TRANS, data_table_schema->get_schema_version()))) { LOG_WARN("fail to init wait trans ctx", K(ret)); } @@ -1262,6 +1313,8 @@ int ObDDLTask::batch_release_snapshot( if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, BATCH_RELEASE_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure: remove snapshot failed", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(snapshot_scn.convert_for_tx(snapshot_version))) { LOG_WARN("failed to convert scn", K(snapshot_scn), K(ret)); } else if (OB_FAIL(ObDDLUtil::get_ddl_tx_timeout(tablet_ids.count(), timeout))) { @@ -1424,7 +1477,7 @@ bool ObDDLTask::is_replica_build_need_retry( } } else { // ret_code is not in some predefined error code list. - need_retry = false; + need_retry = is_error_need_retry(ret_code); } need_retry = OB_TABLE_NOT_EXIST == ret ? false : need_retry; return need_retry; @@ -1481,6 +1534,8 @@ int ObDDLTask::gather_scanned_rows( "AND TRACE_ID='%s' AND PLAN_OPERATION='PHY_SUBPLAN_SCAN' AND OTHERSTAT_5_VALUE='%ld'", OB_ALL_VIRTUAL_SQL_PLAN_MONITOR_TNAME, tenant_id, trace_id_str, task_id))) { LOG_WARN("failed to assign sql", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, QUERY_SQL_PLAN_MONITOR_SLOW))) { + LOG_WARN("ddl sim failure: gather scan rows slow", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_proxy.read(scan_res, tenant_id, scan_sql.ptr()))) { LOG_WARN("fail to execute sql", K(ret)); } else if (OB_ISNULL(scan_result = scan_res.get_result())) { @@ -1529,6 +1584,8 @@ int ObDDLTask::gather_sorted_rows( "AND TRACE_ID='%s' AND PLAN_OPERATION='PHY_SORT' AND OTHERSTAT_5_VALUE='%ld'", OB_ALL_VIRTUAL_SQL_PLAN_MONITOR_TNAME, tenant_id, trace_id_str, task_id))) { LOG_WARN("failed to assign sql", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, QUERY_SQL_PLAN_MONITOR_SLOW))) { + LOG_WARN("ddl sim failure: gather sorted rows slow", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_proxy.read(sort_res, tenant_id, sort_sql.ptr()))) { LOG_WARN("fail to execute sql", K(ret)); } else if (OB_ISNULL(sort_result = sort_res.get_result())) { @@ -1576,6 +1633,8 @@ int ObDDLTask::gather_inserted_rows( "AND TRACE_ID='%s' AND PLAN_OPERATION='PHY_PX_MULTI_PART_SSTABLE_INSERT' AND OTHERSTAT_5_VALUE='%ld'", OB_ALL_VIRTUAL_SQL_PLAN_MONITOR_TNAME, tenant_id, trace_id_str, task_id))) { LOG_WARN("failed to assign sql", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, QUERY_SQL_PLAN_MONITOR_SLOW))) { + LOG_WARN("ddl sim failure: gather insert rows slow", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_proxy.read(insert_res, tenant_id, insert_sql.ptr()))) { LOG_WARN("fail to execute sql", K(ret), K(insert_sql)); } else if (OB_ISNULL(insert_result = insert_res.get_result())) { @@ -1639,6 +1698,7 @@ ObDDLWaitTransEndCtx::~ObDDLWaitTransEndCtx() int ObDDLWaitTransEndCtx::init( const uint64_t tenant_id, + const int64_t ddl_task_id, const uint64_t table_id, const WaitTransType wait_trans_type, const int64_t wait_version) @@ -1648,10 +1708,11 @@ int ObDDLWaitTransEndCtx::init( ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret), K(is_inited_)); } else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id + || ddl_task_id <= 0 || table_id <= 0 || !is_wait_trans_type_valid(wait_trans_type) || wait_version <= 0)) { - LOG_WARN("invalid argument", K(ret), K(tenant_id), K(table_id), K(wait_trans_type), K(wait_version)); + LOG_WARN("invalid argument", K(ret), K(tenant_id), K(ddl_task_id), K(table_id), K(wait_trans_type), K(wait_version)); } else if (OB_FALSE_IT(tablet_ids_.reset())) { } else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id, table_id, tablet_ids_))) { LOG_WARN("get table partitions failed", K(ret)); @@ -1666,6 +1727,7 @@ int ObDDLWaitTransEndCtx::init( } if (OB_SUCC(ret)) { tenant_id_ = tenant_id; + ddl_task_id_ = ddl_task_id; table_id_ = table_id; wait_type_ = wait_trans_type; wait_version_ = wait_version; @@ -1686,6 +1748,7 @@ void ObDDLWaitTransEndCtx::reset() wait_version_ = 0; tablet_ids_.reset(); snapshot_array_.reset(); + ddl_task_id_ = 0; } struct SendItem final @@ -1880,8 +1943,11 @@ int ObDDLWaitTransEndCtx::check_schema_trans_end( arg.tenant_id_ = tenant_id; arg.schema_version_ = schema_version; arg.need_wait_trans_end_ = need_wait_trans_end; + arg.ddl_task_id_ = ddl_task_id_; if (OB_FAIL(check_trans_end(send_array, proxy, arg, res, ret_array, snapshot_array, pending_tx_id_))) { LOG_WARN("check trans end failed", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, ddl_task_id_, CHECK_TRANS_END_FAILED))) { + LOG_WARN("ddl sim failure: check trans end failed", K(ret), K(tenant_id_), K(ddl_task_id_)); } } return ret; @@ -1913,8 +1979,11 @@ int ObDDLWaitTransEndCtx::check_sstable_trans_end( obrpc::ObCheckModifyTimeElapsedResult *res = nullptr; arg.tenant_id_ = tenant_id; arg.sstable_exist_ts_ = sstable_exist_ts; + arg.ddl_task_id_ = ddl_task_id_; if (OB_FAIL(check_trans_end(send_array, proxy, arg, res, ret_array, snapshot_array, pending_tx_id_))) { LOG_WARN("check trans end failed", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, ddl_task_id_, CHECK_TRANS_END_FAILED))) { + LOG_WARN("ddl sim failure: check trans end failed", K(ret), K(tenant_id_), K(ddl_task_id_)); } } return ret; @@ -2004,6 +2073,15 @@ int ObDDLWaitTransEndCtx::try_wait(bool &is_trans_end, int64_t &snapshot_version is_trans_end = is_trans_end_; } } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "ddl wait trans end ctx try_wait", + "tenant_id", tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "wait_type", wait_type_, + "tablet_count", tablet_count, + "snapshot_version", snapshot_version, + is_trans_end_); + LOG_INFO("ddl wait trans end ctx try_wait", K(ret), "ddl_event_info", ObDDLEventInfo(), K(wait_type_)); return ret; } @@ -2030,7 +2108,7 @@ int ObDDLWaitTransEndCtx::get_snapshot(int64_t &snapshot_version) } } if (OB_SUCC(ret)) { - if (OB_FAIL(calc_snapshot_with_gts(tenant_id_, max_snapshot, snapshot_version))) { + if (OB_FAIL(calc_snapshot_with_gts(tenant_id_, ddl_task_id_, max_snapshot, snapshot_version))) { LOG_WARN("calc snapshot with gts failed", K(ret), K(tenant_id_), K(max_snapshot), K(snapshot_version)); } } @@ -2046,6 +2124,7 @@ bool ObDDLWaitTransEndCtx::is_wait_trans_type_valid(const WaitTransType wait_tra int ObDDLWaitTransEndCtx::calc_snapshot_with_gts( const uint64_t tenant_id, + const int64_t ddl_task_id, const int64_t trans_end_snapshot, int64_t &snapshot) { @@ -2081,6 +2160,8 @@ int ObDDLWaitTransEndCtx::calc_snapshot_with_gts( if (OB_FAIL(freeze_info_proxy.get_freeze_info( root_service->get_sql_proxy(), SCN::min_scn(), frozen_status))) { LOG_WARN("get freeze info failed", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, GET_FREEZE_INFO_FAILED))) { + LOG_WARN("ddl sim failure: get freeze info failed", K(ret), K(tenant_id), K(ddl_task_id)); } else { const int64_t frozen_scn_val = frozen_status.frozen_scn_.get_val_for_tx(); snapshot = max(snapshot, frozen_scn_val); @@ -2266,7 +2347,16 @@ int ObDDLWaitColumnChecksumCtx::try_wait(bool &is_column_checksum_ready) } } if (REACH_TIME_INTERVAL(1000L * 1000L)) { - LOG_INFO("try wait checksum", K(ret), K(stat_array_.count()), K(success_count), K(send_succ_count)); + LOG_INFO("try wait checksum", K(ret), K(stat_array_.count()), K(success_count), K(send_succ_count), "ddl_event_info", ObDDLEventInfo()); + if (REACH_TIME_INTERVAL(600 * 1000L * 1000L)) { //10min + ROOTSERVICE_EVENT_ADD("ddl scheduler", "ddl wait column checksum ctx try_wait", + "tenant_id", tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "success_count", success_count, + "send_succ_count", send_succ_count, + "check_count", stat_array_.count()); + } } return ret; } @@ -2292,9 +2382,15 @@ int ObDDLWaitColumnChecksumCtx::update_status(const common::ObTabletID &tablet_i } else if (ObIDDLTask::in_ddl_retry_white_list(ret_code)) { item.col_checksum_stat_ = CCS_NOT_MASTER; } else { - item.col_checksum_stat_ = CCS_FAILED; item.ret_code_ = ret_code; - LOG_WARN("column checksum calc failed", K(ret_code), K(item)); + item.retry_cnt_++; + if (item.retry_cnt_ < ObDDLTask::MAX_ERR_TOLERANCE_CNT) { + item.col_checksum_stat_ = CCS_NOT_MASTER; + LOG_WARN("column checksum calc failed, but retry", K(ret_code), K(item)); + } else { + item.col_checksum_stat_ = CCS_FAILED; + LOG_WARN("column checksum calc failed", K(ret_code), K(item)); + } } } } @@ -2525,6 +2621,10 @@ int ObDDLTaskRecordOperator::update_task_status( } else if (OB_FAIL(sql_string.assign_fmt(" UPDATE %s SET status = %ld WHERE task_id = %lu", OB_ALL_DDL_TASK_STATUS_TNAME, task_status, task_id))) { LOG_WARN("assign sql string failed", K(ret), K(task_status), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, UPDATE_TASK_RECORD_ON_TASK_STATUS_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(proxy.write(tenant_id, sql_string.ptr(), affected_rows))) { LOG_WARN("update status of ddl task record failed", K(ret), K(sql_string)); } else if (OB_UNLIKELY(affected_rows < 0)) { @@ -2549,6 +2649,10 @@ int ObDDLTaskRecordOperator::update_snapshot_version( } else if (OB_FAIL(sql_string.assign_fmt(" UPDATE %s SET snapshot_version=%lu WHERE task_id=%lu ", OB_ALL_DDL_TASK_STATUS_TNAME, snapshot_version < 0 ? 0 : snapshot_version, task_id))) { LOG_WARN("assign sql string failed", K(ret), K(snapshot_version), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, UPDATE_TASK_RECORD_ON_SNAPSHOT_VERSION_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_client.write(tenant_id, sql_string.ptr(), affected_rows))) { LOG_WARN("update snapshot_version of ddl task record failed", K(ret), K(sql_string)); } else if (OB_UNLIKELY(affected_rows < 0)) { @@ -2573,6 +2677,10 @@ int ObDDLTaskRecordOperator::update_ret_code( } else if (OB_FAIL(sql_string.assign_fmt(" UPDATE %s SET ret_code=%ld WHERE task_id=%lu ", OB_ALL_DDL_TASK_STATUS_TNAME, ret_code, task_id))) { LOG_WARN("assign sql string failed", K(ret), K(ret_code), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, UPDATE_TASK_RECORD_ON_RET_CODE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_client.write(tenant_id, sql_string.ptr(), affected_rows))) { LOG_WARN("update snapshot_version of ddl task record failed", K(ret), K(sql_string)); } else if (OB_UNLIKELY(affected_rows < 0)) { @@ -2597,6 +2705,10 @@ int ObDDLTaskRecordOperator::update_execution_id( } else if (OB_FAIL(sql_string.assign_fmt(" UPDATE %s SET execution_id=%lu WHERE task_id=%lu ", OB_ALL_DDL_TASK_STATUS_TNAME, execution_id, task_id))) { LOG_WARN("assign sql string failed", K(ret), K(execution_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, UPDATE_TASK_RECORD_ON_EXECUTION_ID_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_client.write(tenant_id, sql_string.ptr(), affected_rows))) { LOG_WARN("update snapshot_version of ddl task record failed", K(ret), K(sql_string)); } else if (OB_UNLIKELY(affected_rows < 0)) { @@ -2625,6 +2737,10 @@ int ObDDLTaskRecordOperator::update_message( } else if (OB_FAIL(sql_string.assign_fmt(" UPDATE %s SET message=\"%.*s\" WHERE task_id=%lu", OB_ALL_DDL_TASK_STATUS_TNAME, static_cast(message_string.length()), message_string.ptr(), task_id))) { LOG_WARN("assign sql string failed", K(ret), K(message_string)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, UPDATE_TASK_RECORD_ON_MESSAGE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(proxy.write(tenant_id, sql_string.ptr(), affected_rows))) { LOG_WARN("update message of ddl task record failed", K(ret), K(sql_string), K(message_string)); } else if (OB_UNLIKELY(affected_rows < 0)) { @@ -2653,6 +2769,10 @@ int ObDDLTaskRecordOperator::update_status_and_message( } else if (OB_FAIL(sql_string.assign_fmt(" UPDATE %s SET status = %ld, message = \"%.*s\" WHERE task_id = %lu", OB_ALL_DDL_TASK_STATUS_TNAME, task_status, static_cast(message_string.length()), message_string.ptr(), task_id))) { LOG_WARN("assign sql string failed", K(ret), K(task_status), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, UPDATE_TASK_RECORD_ON_STATUS_AND_MESSAGE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(proxy.write(tenant_id, sql_string.ptr(), affected_rows))) { LOG_WARN("update status of ddl task record failed", K(ret), K(sql_string)); } else if (OB_UNLIKELY(affected_rows < 0)) { @@ -2673,6 +2793,10 @@ int ObDDLTaskRecordOperator::delete_record(common::ObMySQLProxy &proxy, const ui } else if (OB_FAIL(sql_string.assign_fmt(" DELETE FROM %s WHERE task_id=%lu", OB_ALL_DDL_TASK_STATUS_TNAME, task_id))) { LOG_WARN("assign sql string failed", K(ret), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DELETE_TASK_RECORD_FAILED))) { + LOG_WARN("ddl sim failure: delete task record failed", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(proxy.write(tenant_id, sql_string.ptr(), affected_rows))) { LOG_WARN("delete ddl task record failed", K(ret), K(sql_string)); } else if (OB_UNLIKELY(affected_rows < 0)) { @@ -2803,6 +2927,10 @@ int ObDDLTaskRecordOperator::check_has_conflict_ddl( "UNHEX(ddl_stmt_str) as ddl_stmt_str_unhex, ret_code, UNHEX(message) as message_unhex FROM %s " "WHERE object_id = %lu", OB_ALL_DDL_TASK_STATUS_TNAME, table_id))) { LOG_WARN("assign sql string failed", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, QUERY_TASK_RECORD_CHECK_CONFLICT_DDL_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(proxy->read(res, tenant_id, sql_string.ptr()))) { LOG_WARN("query ddl task record failed", K(ret), K(sql_string)); } else if (OB_ISNULL(result = res.get_result())) { @@ -2993,6 +3121,8 @@ int ObDDLTaskRecordOperator::get_ddl_task_record(const uint64_t tenant_id, } else if (task_records.count() != 1) { ret = OB_ERR_UNEXPECTED; LOG_WARN("one task_id but task_records count() != 1", K(ret), K(task_id), K(task_records)); + } else if (OB_FAIL(DDL_SIM(record.tenant_id_, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(record.tenant_id_), K(task_id)); } else if (OB_FAIL(task_records.at(0, record))) { LOG_WARN("get task_record failed", K(ret), K(task_id)); } else if (!record.is_valid()) { @@ -3134,6 +3264,8 @@ int ObDDLTaskRecordOperator::insert_record( get_record_id(record.ddl_type_, record.target_object_id_), record.ddl_type_, trace_id_str, record.task_status_, record.task_version_, record.execution_id_, record.ret_code_, static_cast(ddl_stmt_string.length()), ddl_stmt_string.ptr(), static_cast(message_string.length()), message_string.ptr()))) { LOG_WARN("assign sql string failed", K(ret), K(record)); + } else if (OB_FAIL(DDL_SIM(record.tenant_id_, record.task_id_, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(record.tenant_id_), K(record.task_id_)); } else if (OB_FAIL(proxy.write(record.tenant_id_, sql_string.ptr(), affected_rows))) { LOG_WARN("insert ddl task record failed", K(ret), K(sql_string), K(record)); } else if (OB_UNLIKELY(1 != affected_rows)) { @@ -3252,6 +3384,10 @@ int ObDDLTaskRecordOperator::select_for_update( if (OB_FAIL(sql_string.assign_fmt("SELECT status, execution_id FROM %s WHERE task_id = %lu FOR UPDATE", OB_ALL_DDL_TASK_STATUS_TNAME, task_id))) { LOG_WARN("assign sql string failed", K(ret), K(task_id), K(tenant_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, SELECT_TASK_RECORD_FOR_UPDATE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(trans.read(res, tenant_id, sql_string.ptr()))) { LOG_WARN("update status of ddl task record failed", K(ret), K(sql_string)); } else if (OB_UNLIKELY(NULL == (result = res.get_result()))) { @@ -3363,6 +3499,10 @@ int ObDDLTaskRecordOperator::kill_task_inner_sql( } LOG_INFO("kill session inner sql", K(sql_string), K(task_id), K(sql_exec_addr)); if (OB_FAIL(ret)) { + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, TASK_STATUS_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure: slow inner sql", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, KILL_TASK_BY_INNER_SQL_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(proxy.read(res, OB_SYS_TENANT_ID, sql_string.ptr(), &sql_exec_addr))) { // default use OB_SYS_TENANT_ID LOG_WARN("query ddl task record failed", K(ret), K(sql_string)); } else if (OB_ISNULL((result = res.get_result()))) { diff --git a/src/rootserver/ddl_task/ob_ddl_task.h b/src/rootserver/ddl_task/ob_ddl_task.h index e6373eefe6..ab145347b9 100755 --- a/src/rootserver/ddl_task/ob_ddl_task.h +++ b/src/rootserver/ddl_task/ob_ddl_task.h @@ -113,13 +113,14 @@ struct ObDDLTaskSerializeField final { OB_UNIS_VERSION(1); public: - TO_STRING_KV(K_(task_version), K_(parallelism), K_(data_format_version), K_(consumer_group_id), K_(is_abort)); - ObDDLTaskSerializeField() : task_version_(0), parallelism_(0), data_format_version_(0), consumer_group_id_(0), is_abort_(false) {} + TO_STRING_KV(K_(task_version), K_(parallelism), K_(data_format_version), K_(consumer_group_id), K_(is_abort), K_(sub_task_trace_id)); + ObDDLTaskSerializeField() : task_version_(0), parallelism_(0), data_format_version_(0), consumer_group_id_(0), is_abort_(false), sub_task_trace_id_(0) {} ObDDLTaskSerializeField(const int64_t task_version, const int64_t parallelism, const int64_t data_format_version, const int64_t consumer_group_id, - const bool is_abort); + const bool is_abort, + const int32_t sub_task_trace_id); ~ObDDLTaskSerializeField() = default; void reset(); public: @@ -128,6 +129,7 @@ public: int64_t data_format_version_; int64_t consumer_group_id_; bool is_abort_; + int32_t sub_task_trace_id_; }; struct ObCreateDDLTaskParam final @@ -149,9 +151,10 @@ public: ~ObCreateDDLTaskParam() = default; bool is_valid() const { return OB_INVALID_ID != tenant_id_ && type_ > share::DDL_INVALID && type_ < share::DDL_MAX && nullptr != allocator_; } - TO_STRING_KV(K_(tenant_id), K_(object_id), K_(schema_version), K_(parallelism), K_(consumer_group_id), K_(parent_task_id), K_(task_id), + TO_STRING_KV(K_(sub_task_trace_id), K_(tenant_id), K_(object_id), K_(schema_version), K_(parallelism), K_(consumer_group_id), K_(parent_task_id), K_(task_id), K_(type), KPC_(src_table_schema), KPC_(dest_table_schema), KPC_(ddl_arg)); public: + int32_t sub_task_trace_id_; uint64_t tenant_id_; int64_t object_id_; int64_t schema_version_; @@ -321,6 +324,7 @@ public: ~ObDDLWaitTransEndCtx(); int init( const uint64_t tenant_id, + const int64_t ddl_task_id, const uint64_t table_id, const WaitTransType wait_trans_type, const int64_t wait_version); @@ -329,7 +333,7 @@ public: int try_wait(bool &is_trans_end, int64_t &snapshot_version, const bool need_wait_trans_end = true); transaction::ObTransID get_pending_tx_id() const { return pending_tx_id_; } TO_STRING_KV(K(is_inited_), K_(tenant_id), K(table_id_), K(is_trans_end_), K(wait_type_), - K(wait_version_), K_(pending_tx_id), K(tablet_ids_.count()), K(snapshot_array_.count())); + K(wait_version_), K_(pending_tx_id), K(tablet_ids_.count()), K(snapshot_array_.count()), K(ddl_task_id_)); public: /** @@ -340,6 +344,7 @@ public: */ static int calc_snapshot_with_gts( const uint64_t tenant_id, + const int64_t ddl_task_id, const int64_t trans_end_snapshot, int64_t &snapshot); private: @@ -380,6 +385,7 @@ private: transaction::ObTransID pending_tx_id_; common::ObArray tablet_ids_; common::ObArray snapshot_array_; + int64_t ddl_task_id_; }; class ObDDLTask; @@ -465,7 +471,7 @@ class ObDDLTask : public common::ObDLinkBase public: explicit ObDDLTask(const share::ObDDLType task_type) : lock_(), ddl_tracing_(this), is_inited_(false), need_retry_(true), is_running_(false), is_abort_(false), - task_type_(task_type), trace_id_(), tenant_id_(0), dst_tenant_id_(0), object_id_(0), schema_version_(0), dst_schema_version_(0), + task_type_(task_type), trace_id_(), sub_task_trace_id_(0), tenant_id_(0), dst_tenant_id_(0), object_id_(0), schema_version_(0), dst_schema_version_(0), target_object_id_(0), task_status_(share::ObDDLTaskStatus::PREPARE), snapshot_version_(0), ret_code_(OB_SUCCESS), task_id_(0), parent_task_id_(0), parent_task_key_(), task_version_(0), parallelism_(0), allocator_(lib::ObLabel("DdlTask")), compat_mode_(lib::Worker::CompatMode::INVALID), err_code_occurence_cnt_(0), @@ -485,6 +491,9 @@ public: void set_is_abort(const bool is_abort) { is_abort_ = is_abort; } bool get_is_abort() { return is_abort_; } void set_consumer_group_id(const int64_t group_id) { consumer_group_id_ = group_id; } + void set_sub_task_trace_id(const int32_t sub_task_trace_id) { sub_task_trace_id_ = sub_task_trace_id; } + void add_event_info(const ObString &ddl_event_stmt); + void add_event_info(const share::ObDDLTaskStatus status, const uint64_t tenant_id); bool try_set_running() { return !ATOMIC_CAS(&is_running_, false, true); } uint64_t get_tenant_id() const { return dst_tenant_id_; } uint64_t get_object_id() const { return object_id_; } @@ -551,7 +560,7 @@ public: int check_errsim_error(); #endif VIRTUAL_TO_STRING_KV( - K(is_inited_), K(need_retry_), K(is_abort_), K(task_type_), K(trace_id_), + K(is_inited_), K(need_retry_), K(is_abort_), K(task_type_), K(trace_id_), K(sub_task_trace_id_), K(tenant_id_), K(dst_tenant_id_), K(object_id_), K(schema_version_), K(target_object_id_), K(task_status_), K(snapshot_version_), K_(ret_code), K_(task_id), K_(parent_task_id), K_(parent_task_key), @@ -559,6 +568,7 @@ public: K_(sys_task_id), K_(err_code_occurence_cnt), K_(stat_info), K_(next_schedule_ts), K_(delay_schedule_time), K(execution_id_), K(sql_exec_addr_), K_(data_format_version), K(consumer_group_id_), K_(dst_tenant_id), K_(dst_schema_version)); + static const int64_t MAX_ERR_TOLERANCE_CNT = 3L; // Max torlerance count for error code. protected: int gather_redefinition_stats(const uint64_t tenant_id, const int64_t task_id, @@ -589,7 +599,6 @@ protected: } int init_ddl_task_monitor_info(const uint64_t target_table_id); protected: - static const int64_t MAX_ERR_TOLERANCE_CNT = 3L; // Max torlerance count for error code. static const int64_t TASK_EXECUTE_TIME_THRESHOLD = 3 * 24 * 60 * 60 * 1000000L; // 3 days common::TCRWLock lock_; ObDDLTracing ddl_tracing_; @@ -599,6 +608,7 @@ protected: bool is_abort_; share::ObDDLType task_type_; TraceId trace_id_; + int32_t sub_task_trace_id_; uint64_t tenant_id_; uint64_t dst_tenant_id_; uint64_t object_id_; @@ -645,7 +655,9 @@ struct PartitionColChecksumStat col_checksum_stat_(CCS_INVALID), snapshot_(-1), execution_id_(-1), - ret_code_(OB_SUCCESS) + ret_code_(common::OB_SUCCESS), + retry_cnt_(0), + table_id_(common::OB_INVALID_ID) {} void reset() { tablet_id_.reset(); @@ -653,6 +665,7 @@ struct PartitionColChecksumStat snapshot_ = -1; execution_id_ = -1; ret_code_ = common::OB_SUCCESS; + retry_cnt_ = 0; table_id_ = common::OB_INVALID_ID; } bool is_valid() const { return tablet_id_.is_valid() && execution_id_ >= 0 && common::OB_INVALID_ID != table_id_; } @@ -660,12 +673,15 @@ struct PartitionColChecksumStat K_(col_checksum_stat), K_(snapshot), K_(execution_id), + K_(ret_code), + K_(retry_cnt), K_(table_id)); ObTabletID tablet_id_; // may be data table, local index or global index ColChecksumStat col_checksum_stat_; int64_t snapshot_; int64_t execution_id_; int ret_code_; + int retry_cnt_; int64_t table_id_; }; diff --git a/src/rootserver/ddl_task/ob_drop_index_task.cpp b/src/rootserver/ddl_task/ob_drop_index_task.cpp index b858b5886f..642ca939c9 100644 --- a/src/rootserver/ddl_task/ob_drop_index_task.cpp +++ b/src/rootserver/ddl_task/ob_drop_index_task.cpp @@ -15,6 +15,7 @@ #include "ob_drop_index_task.h" #include "share/schema/ob_multi_version_schema_service.h" #include "share/ob_ddl_error_message_table_operator.h" +#include "share/ob_ddl_sim_point.h" #include "rootserver/ob_root_service.h" using namespace oceanbase::rootserver; @@ -42,6 +43,7 @@ int ObDropIndexTask::init( const int64_t schema_version, const int64_t parent_task_id, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObDropIndexArg &drop_index_arg) { int ret = OB_SUCCESS; @@ -64,6 +66,7 @@ int ObDropIndexTask::init( task_id_ = task_id; parent_task_id_ = parent_task_id; consumer_group_id_ = consumer_group_id; + sub_task_trace_id_ = sub_task_trace_id; task_version_ = OB_DROP_INDEX_TASK_VERSION; dst_tenant_id_ = tenant_id_; dst_schema_version_ = schema_version_; @@ -148,6 +151,8 @@ int ObDropIndexTask::update_index_status(const ObIndexStatus new_status) DEBUG_SYNC(BEFORE_UPDATE_GLOBAL_INDEX_STATUS); if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(tenant_id_, table_id, ddl_rpc_timeout))) { LOG_WARN("get ddl rpc timeout fail", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UPDATE_INDEX_STATUS_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(root_service_->get_common_rpc_proxy().to(GCTX.self_addr()).timeout(ddl_rpc_timeout).update_index_status(arg))) { LOG_WARN("update index status failed", K(ret), K(arg)); } else { @@ -250,6 +255,8 @@ int ObDropIndexTask::drop_index_impl() drop_index_arg.task_id_ = task_id_; if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(index_schema->get_all_part_num() + data_table_schema->get_all_part_num(), ddl_rpc_timeout))) { LOG_WARN("failed to get ddl rpc timeout", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DROP_INDEX_RPC_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(root_service_->get_common_rpc_proxy().timeout(ddl_rpc_timeout).drop_index(drop_index_arg, drop_index_res))) { LOG_WARN("drop index failed", K(ret), K(ddl_rpc_timeout)); } @@ -367,6 +374,10 @@ int ObDropIndexTask::process() LOG_WARN("error unexpected, task status is not valid", K(ret), K(task_status_)); } ddl_tracing_.release_span_hierarchy(); + if (OB_FAIL(ret)) { + add_event_info("drop index task process fail"); + LOG_INFO("drop index task process fail", "ddl_event_info", ObDDLEventInfo()); + } } return ret; } diff --git a/src/rootserver/ddl_task/ob_drop_index_task.h b/src/rootserver/ddl_task/ob_drop_index_task.h index a0a52523e3..d6dcbe5342 100644 --- a/src/rootserver/ddl_task/ob_drop_index_task.h +++ b/src/rootserver/ddl_task/ob_drop_index_task.h @@ -33,6 +33,7 @@ public: const int64_t schema_version, const int64_t parent_task_id, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObDropIndexArg &drop_index_arg); int init(const ObDDLTaskRecord &task_record); virtual int process() override; diff --git a/src/rootserver/ddl_task/ob_drop_primary_key_task.cpp b/src/rootserver/ddl_task/ob_drop_primary_key_task.cpp index ab2846a06b..514fb41b33 100644 --- a/src/rootserver/ddl_task/ob_drop_primary_key_task.cpp +++ b/src/rootserver/ddl_task/ob_drop_primary_key_task.cpp @@ -40,16 +40,18 @@ ObDropPrimaryKeyTask::~ObDropPrimaryKeyTask() int ObDropPrimaryKeyTask::init(const uint64_t tenant_id, const int64_t task_id, const share::ObDDLType &ddl_type, const int64_t data_table_id, const int64_t dest_table_id, const int64_t schema_version, const int64_t parallelism, - const int64_t consumer_group_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) + const int64_t consumer_group_id, const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, + const int64_t task_status, const int64_t snapshot_version) { int ret = OB_SUCCESS; if (OB_FAIL(ObTableRedefinitionTask::init(tenant_id, tenant_id, task_id, ddl_type, data_table_id, dest_table_id, schema_version, schema_version, parallelism, consumer_group_id, - alter_table_arg, task_status, snapshot_version))) { + sub_task_trace_id, alter_table_arg, task_status, snapshot_version))) { LOG_WARN("fail to init ObDropPrimaryKeyTask", K(ret)); } else { set_gmt_create(ObTimeUtility::current_time()); consumer_group_id_ = consumer_group_id; + sub_task_trace_id_ = sub_task_trace_id; task_version_ = OB_DROP_PRIMARY_KEY_TASK_VERSION; ddl_tracing_.open(); } @@ -118,6 +120,10 @@ int ObDropPrimaryKeyTask::process() break; } ddl_tracing_.release_span_hierarchy(); + if (OB_FAIL(ret)) { + add_event_info("drop primary key task process fail"); + LOG_INFO("drop primary key task process fail", "ddl_event_info", ObDDLEventInfo()); + } } return ret; } diff --git a/src/rootserver/ddl_task/ob_drop_primary_key_task.h b/src/rootserver/ddl_task/ob_drop_primary_key_task.h index ee5524cfc3..e87352aa4c 100644 --- a/src/rootserver/ddl_task/ob_drop_primary_key_task.h +++ b/src/rootserver/ddl_task/ob_drop_primary_key_task.h @@ -36,6 +36,7 @@ public: const int64_t schema_version, const int64_t parallelism, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); diff --git a/src/rootserver/ddl_task/ob_index_build_task.cpp b/src/rootserver/ddl_task/ob_index_build_task.cpp index 16cba78401..fdb3b1e584 100755 --- a/src/rootserver/ddl_task/ob_index_build_task.cpp +++ b/src/rootserver/ddl_task/ob_index_build_task.cpp @@ -18,6 +18,7 @@ #include "share/ob_ddl_error_message_table_operator.h" #include "storage/ddl/ob_ddl_lock.h" #include "share/ob_ddl_common.h" +#include "share/ob_ddl_sim_point.h" #include "rootserver/ob_root_service.h" #include "share/scn.h" @@ -76,6 +77,8 @@ int ObIndexSSTableBuildTask::process() } else if (NULL == sys_variable_schema) { ret = OB_ERR_UNEXPECTED; LOG_WARN("sys variable schema is NULL", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, BUILD_REPLICA_ASYNC_TASK_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(sys_variable_schema->get_oracle_mode(oracle_mode))) { LOG_WARN("get oracle mode failed", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, data_table_id_, data_schema))) { @@ -133,11 +136,14 @@ int ObIndexSSTableBuildTask::process() DEBUG_SYNC(BEFORE_INDEX_SSTABLE_BUILD_TASK_SEND_SQL); ObTimeoutCtx timeout_ctx; const int64_t DDL_INNER_SQL_EXECUTE_TIMEOUT = ObDDLUtil::calc_inner_sql_execute_timeout(); - LOG_INFO("execute sql" , K(sql_string), K(data_table_id_), K(tenant_id_), K(DDL_INNER_SQL_EXECUTE_TIMEOUT)); + add_event_info(ret, "index sstable build task send innersql"); + LOG_INFO("execute sql" , K(sql_string), K(data_table_id_), K(tenant_id_), K(DDL_INNER_SQL_EXECUTE_TIMEOUT), "ddl_event_info", ObDDLEventInfo()); if (OB_FAIL(timeout_ctx.set_trx_timeout_us(DDL_INNER_SQL_EXECUTE_TIMEOUT))) { LOG_WARN("set trx timeout failed", K(ret)); } else if (OB_FAIL(timeout_ctx.set_timeout(DDL_INNER_SQL_EXECUTE_TIMEOUT))) { LOG_WARN("set timeout failed", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, CREATE_INDEX_BUILD_SSTABLE_FAILED))) { + LOG_WARN("ddl sim failure: create index build sstable failed", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(user_sql_proxy->write(tenant_id_, sql_string.ptr(), affected_rows, oracle_mode ? ObCompatibilityMode::ORACLE_MODE : ObCompatibilityMode::MYSQL_MODE, &session_param, sql_exec_addr))) { LOG_WARN("fail to execute build replica sql", K(ret), K(tenant_id_)); @@ -148,7 +154,6 @@ int ObIndexSSTableBuildTask::process() } } - LOG_INFO("build index sstable finish", K(ret), K(*this)); ObDDLTaskKey task_key(tenant_id_, dest_table_id_, schema_version_); ObDDLTaskInfo info; int tmp_ret = root_service_->get_ddl_scheduler().on_sstable_complement_job_reply( @@ -157,9 +162,25 @@ int ObIndexSSTableBuildTask::process() LOG_WARN("report build finish failed", K(ret), K(tmp_ret)); ret = OB_SUCCESS == ret ? tmp_ret : ret; } + add_event_info(ret, "index sstable build task finish"); + LOG_INFO("build index sstable finish", K(ret), "ddl_event_info", ObDDLEventInfo(), K(*this), K(sql_string)); return ret; } +void ObIndexSSTableBuildTask::add_event_info(const int ret, const ObString &ddl_event_stmt) +{ + char table_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "data_table_id:%ld, dest_table_id:%ld", + data_table_id_, dest_table_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", ddl_event_stmt.ptr(), + "tenant_id", tenant_id_, + "ret", ret, + K_(trace_id), + K_(task_id), + "table_id", table_id_buffer, + "sql_exec_addr", inner_sql_exec_addr_); +} + ObAsyncTask *ObIndexSSTableBuildTask::deep_copy(char *buf, const int64_t buf_size) const { ObIndexSSTableBuildTask *task = NULL; @@ -267,6 +288,10 @@ int ObIndexBuildTask::process() } } // end switch ddl_tracing_.release_span_hierarchy(); + if (OB_FAIL(ret)) { + add_event_info("index build task process fail"); + LOG_INFO("index build task process fail", "ddl_event_info", ObDDLEventInfo()); + } } return ret; } @@ -325,6 +350,7 @@ int ObIndexBuildTask::init( const int64_t schema_version, const int64_t parallelism, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObCreateIndexArg &create_index_arg, const int64_t parent_task_id /* = 0 */, const int64_t task_status /* = TaskStatus::PREPARE */, @@ -375,6 +401,7 @@ int ObIndexBuildTask::init( sstable_complete_ts_ = ObTimeUtility::current_time(); } consumer_group_id_ = consumer_group_id; + sub_task_trace_id_ = sub_task_trace_id; task_id_ = task_id; parent_task_id_ = parent_task_id; task_version_ = OB_INDEX_BUILD_TASK_VERSION; @@ -420,6 +447,8 @@ int ObIndexBuildTask::init(const ObDDLTaskRecord &task_record) } else if (!task_record.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(task_record)); + } else if (OB_FAIL(DDL_SIM(task_record.tenant_id_, task_record.task_id_, DDL_TASK_INIT_BY_RECORD_FAILED))) { + LOG_WARN("ddl sim failure", K(task_record.tenant_id_), K(task_record.task_id_)); } else if (OB_FAIL(deserlize_params_from_message(task_record.tenant_id_, task_record.message_.ptr(), task_record.message_.length(), pos))) { LOG_WARN("deserialize params from message failed", K(ret)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( @@ -593,7 +622,7 @@ int ObIndexBuildTask::wait_trans_end() bool is_trans_end = false; int64_t tmp_snapshot_version = 0; if (!wait_trans_ctx_.is_inited() && OB_FAIL(wait_trans_ctx_.init( - tenant_id_, object_id_, ObDDLWaitTransEndCtx::WaitTransType::WAIT_SCHEMA_TRANS, schema_version_))) { + tenant_id_, task_id_, object_id_, ObDDLWaitTransEndCtx::WaitTransType::WAIT_SCHEMA_TRANS, schema_version_))) { LOG_WARN("init wait_trans_ctx failed", K(ret), K(object_id_), K(index_table_id_)); } else if (OB_FAIL(wait_trans_ctx_.try_wait(is_trans_end, tmp_snapshot_version))) { LOG_WARN("try wait transaction end failed", K(ret), K(object_id_), K(index_table_id_)); @@ -646,6 +675,8 @@ int ObIndexBuildTask::hold_snapshot(const int64_t snapshot) } else if (snapshot <= 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("snapshot version not valid", K(ret), K(snapshot)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_HOLD_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure: hold snapshot failed", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(snapshot_scn.convert_for_tx(snapshot))) { LOG_WARN("failed to convert", K(snapshot), K(ret)); } else { @@ -692,6 +723,8 @@ int ObIndexBuildTask::release_snapshot(const int64_t snapshot) if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_RELEASE_SNAPSHOT_FAILED))) { + LOG_WARN("ddl sim failure: release snapshot failed", K(ret), K(tenant_id_), K(task_id_)); } else { ObDDLService &ddl_service = root_service_->get_ddl_service(); ObSEArray tablet_ids; @@ -792,6 +825,8 @@ int ObIndexBuildTask::send_build_single_replica_request() if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObIndexBuildTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_SEND_BUILD_REPLICA_REQUEST_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(ObDDLTask::push_execution_id(tenant_id_, task_id_, new_execution_id))) { LOG_WARN("failed to fetch new execution id", K(ret)); } else { @@ -910,7 +945,12 @@ int ObIndexBuildTask::wait_data_complement() } if (OB_SUCC(ret) && state_finished && !create_index_arg_.is_spatial_index()) { bool dummy_equal = false; - if (OB_FAIL(ObDDLChecksumOperator::check_column_checksum( + bool need_verify_checksum = true; +#ifdef ERRSIM + // when the major compaction is delayed, skip verify column checksum + need_verify_checksum = 0 == GCONF.errsim_ddl_major_delay_time; +#endif + if (need_verify_checksum && OB_FAIL(ObDDLChecksumOperator::check_column_checksum( tenant_id_, get_execution_id(), object_id_, index_table_id_, task_id_, false/*index build*/, dummy_equal, root_service_->get_sql_proxy()))) { if (OB_ITER_END != ret) { LOG_WARN("fail to check column checksum", K(ret), K(index_table_id_), K(object_id_), K(task_id_)); @@ -1034,7 +1074,7 @@ int ObIndexBuildTask::verify_checksum() bool is_trans_end = false; int64_t tmp_snapshot_version = 0; if (!wait_trans_ctx_.is_inited() && OB_FAIL(wait_trans_ctx_.init( - tenant_id_, object_id_, ObDDLWaitTransEndCtx::WaitTransType::WAIT_SSTABLE_TRANS, sstable_complete_ts_))) { + tenant_id_, task_id_, object_id_, ObDDLWaitTransEndCtx::WaitTransType::WAIT_SSTABLE_TRANS, sstable_complete_ts_))) { LOG_WARN("init wait_trans_ctx failed", K(ret), K(object_id_), K(index_table_id_)); } else if (OB_FAIL(wait_trans_ctx_.try_wait(is_trans_end, tmp_snapshot_version))) { LOG_WARN("try wait transaction end failed", K(ret), K(object_id_), K(index_table_id_)); @@ -1123,6 +1163,8 @@ int ObIndexBuildTask::update_complete_sstable_job_status( } else if (OB_UNLIKELY(snapshot_version <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(snapshot_version), K(ret_code)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UPDATE_COMPLETE_SSTABLE_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else if (ObDDLTaskStatus::REDEFINITION != task_status_) { // by pass, may be network delay LOG_INFO("not waiting data complete, may finished", K(task_status_)); @@ -1153,6 +1195,8 @@ int ObIndexBuildTask::enable_index() } else if (ObDDLTaskStatus::TAKE_EFFECT != task_status_) { ret = OB_STATE_NOT_MATCH; LOG_WARN("task status not match", K(ret), K(task_status_)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_TAKE_EFFECT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { share::schema::ObMultiVersionSchemaService &schema_service = root_service_->get_schema_service(); share::schema::ObSchemaGetterGuard schema_guard; @@ -1236,6 +1280,8 @@ int ObIndexBuildTask::update_index_status_in_schema(const ObTableSchema &index_s } else if (OB_FAIL(ObDDLUtil::get_ddl_rpc_timeout(tenant_id_, index_schema.get_data_table_id(), tmp_timeout))) { LOG_WARN("get ddl rpc timeout fail", K(ret)); } else if (OB_FALSE_IT(ddl_rpc_timeout += tmp_timeout)) { + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UPDATE_INDEX_STATUS_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(root_service_->get_common_rpc_proxy().to(GCTX.self_addr()).timeout(ddl_rpc_timeout).update_index_status(arg))) { LOG_WARN("update index status failed", K(ret), K(arg)); } else { @@ -1287,7 +1333,7 @@ int ObIndexBuildTask::clean_on_failed() } else if (ObIndexStatus::INDEX_STATUS_INDEX_ERROR != index_schema->get_index_status()) { state_finished = false; } else if (!wait_trans_ctx_.is_inited() && OB_FAIL(wait_trans_ctx_.init( - tenant_id_, object_id_, ObDDLWaitTransEndCtx::WaitTransType::WAIT_SCHEMA_TRANS, index_schema->get_schema_version()))) { + tenant_id_, task_id_, object_id_, ObDDLWaitTransEndCtx::WaitTransType::WAIT_SCHEMA_TRANS, index_schema->get_schema_version()))) { LOG_WARN("init wait_trans_ctx failed", K(ret), K(object_id_), K(index_table_id_)); } else if (OB_FAIL(wait_trans_ctx_.try_wait(is_trans_end, tmp_snapshot_version))) { LOG_WARN("try wait transaction end failed", K(ret), K(object_id_), K(index_table_id_)); @@ -1501,6 +1547,8 @@ int ObIndexBuildTask::collect_longops_stat(ObLongopsValue &value) break; } if (OB_FAIL(ret)) { + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_COLLECT_LONGOPS_STAT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(copy_longops_stat(value))) { LOG_WARN("failed to collect common longops stat", K(ret)); } diff --git a/src/rootserver/ddl_task/ob_index_build_task.h b/src/rootserver/ddl_task/ob_index_build_task.h index e7a945c651..d5a028fcfe 100644 --- a/src/rootserver/ddl_task/ob_index_build_task.h +++ b/src/rootserver/ddl_task/ob_index_build_task.h @@ -52,6 +52,7 @@ public: virtual int process() override; virtual int64_t get_deep_copy_size() const override { return sizeof(*this); } virtual ObAsyncTask *deep_copy(char *buf, const int64_t buf_size) const override; + void add_event_info(const int ret, const ObString &ddl_event_stmt); TO_STRING_KV(K_(data_table_id), K_(dest_table_id), K_(schema_version), K_(snapshot_version), K_(execution_id), K_(consumer_group_id), K_(trace_id), K_(parallelism), K_(nls_date_format), K_(nls_timestamp_format), K_(nls_timestamp_tz_format)); @@ -90,6 +91,7 @@ public: const int64_t schema_version, const int64_t parallel, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObCreateIndexArg &create_index_arg, const int64_t parent_task_id /* = 0 */, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, diff --git a/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp b/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp index 8ea345dd2f..80c8fff19e 100644 --- a/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp +++ b/src/rootserver/ddl_task/ob_modify_autoinc_task.cpp @@ -15,6 +15,7 @@ #include "rootserver/ob_root_service.h" #include "share/ob_autoincrement_service.h" #include "share/ob_ddl_error_message_table_operator.h" +#include "share/ob_ddl_sim_point.h" #include "storage/tablelock/ob_table_lock_service.h" #include "storage/tablelock/ob_table_lock_rpc_client.h" #include "storage/ddl/ob_ddl_lock.h" @@ -57,6 +58,8 @@ int ObUpdateAutoincSequenceTask::process() ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(tenant_id_), K(data_table_id_), K(column_id_), K(orig_column_type_), K(dest_table_id_)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UPDATE_AUTOINC_SEQUENCE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { ObDDLService &ddl_service = root_service->get_ddl_service(); ObMultiVersionSchemaService &schema_service = ddl_service.get_schema_service(); @@ -120,8 +123,19 @@ int ObUpdateAutoincSequenceTask::process() if (OB_SUCCESS != (tmp_ret = root_service->get_ddl_scheduler().notify_update_autoinc_end(task_key, max_value + 1, ret))) { LOG_WARN("fail to finish update autoinc task", K(ret), K(max_value)); } - LOG_INFO("execute finish update autoinc task finish", K(ret), K(task_key), K(data_table_id_), K(column_id_), K(max_value)); + LOG_INFO("execute finish update autoinc task finish", K(ret), "ddl_event_info", ObDDLEventInfo(), K(task_key), K(data_table_id_), K(column_id_), K(max_value)); } + char table_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "data_table_id:%ld, dest_table_id:%ld", + data_table_id_, dest_table_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "update autoinc sequence task process", + "tenant_id", tenant_id_, + "ret", ret, + K_(trace_id), + K_(task_id), + "table_id", table_id_buffer, + K_(schema_version), + column_id_); return ret; } @@ -157,6 +171,7 @@ int ObModifyAutoincTask::init(const uint64_t tenant_id, const int64_t table_id, const int64_t schema_version, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) @@ -180,6 +195,7 @@ int ObModifyAutoincTask::init(const uint64_t tenant_id, target_object_id_ = table_id; schema_version_ = schema_version; consumer_group_id_ = consumer_group_id; + sub_task_trace_id_ = sub_task_trace_id; task_status_ = static_cast(task_status); snapshot_version_ = snapshot_version; tenant_id_ = tenant_id; @@ -272,6 +288,10 @@ int ObModifyAutoincTask::process() } } ddl_tracing_.release_span_hierarchy(); + if (OB_FAIL(ret)) { + add_event_info("modify autoinc task process fail"); + LOG_INFO("modify autoinc task process fail", "ddl_event_info", ObDDLEventInfo()); + } } return ret; } @@ -312,6 +332,8 @@ int ObModifyAutoincTask::modify_autoinc() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, MODIFY_AUTOINC_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(check_update_autoinc_end(is_update_autoinc_end))) { LOG_WARN("fail to check update autoinc end", K(ret)); } else if (!is_update_autoinc_end && update_autoinc_job_time_ == 0) { @@ -408,6 +430,7 @@ int ObModifyAutoincTask::wait_trans_end() ret = OB_TABLE_NOT_EXIST; LOG_WARN("cannot find orig table", K(ret), K(alter_table_arg_)); } else if (OB_FAIL(wait_trans_ctx_.init(tenant_id_, + task_id_, object_id_, ObDDLWaitTransEndCtx::WAIT_SCHEMA_TRANS, updated_table_schema->get_schema_version()))) { @@ -448,6 +471,8 @@ int ObModifyAutoincTask::set_schema_available() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_TAKE_EFFECT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { ObSArray unused_ids; alter_table_arg_.ddl_task_type_ = share::UPDATE_AUTOINC_SCHEMA; diff --git a/src/rootserver/ddl_task/ob_modify_autoinc_task.h b/src/rootserver/ddl_task/ob_modify_autoinc_task.h index 22c7f230eb..2d3a6644d1 100644 --- a/src/rootserver/ddl_task/ob_modify_autoinc_task.h +++ b/src/rootserver/ddl_task/ob_modify_autoinc_task.h @@ -59,6 +59,7 @@ public: const int64_t table_id, const int64_t schema_version, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status = share::ObDDLTaskStatus::MODIFY_AUTOINC, const int64_t snapshot_version = 0); diff --git a/src/rootserver/ddl_task/ob_recover_restore_table_task.cpp b/src/rootserver/ddl_task/ob_recover_restore_table_task.cpp index 07f8b8c11b..2968afee1a 100755 --- a/src/rootserver/ddl_task/ob_recover_restore_table_task.cpp +++ b/src/rootserver/ddl_task/ob_recover_restore_table_task.cpp @@ -39,7 +39,7 @@ ObRecoverRestoreTableTask::~ObRecoverRestoreTableTask() int ObRecoverRestoreTableTask::init(const uint64_t src_tenant_id, const uint64_t dst_tenant_id, const int64_t task_id, const share::ObDDLType &ddl_type, const int64_t data_table_id, const int64_t dest_table_id, const int64_t src_schema_version, - const int64_t dst_schema_version, const int64_t parallelism, const int64_t consumer_group_id, + const int64_t dst_schema_version, const int64_t parallelism, const int64_t consumer_group_id, const int32_t sub_task_trace_id, const ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) { int ret = OB_SUCCESS; @@ -50,7 +50,7 @@ int ObRecoverRestoreTableTask::init(const uint64_t src_tenant_id, const uint64_t ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(ddl_type), K(src_tenant_id), K(data_table_id)); } else if (OB_FAIL(ObTableRedefinitionTask::init(src_tenant_id, dst_tenant_id, task_id, ddl_type, data_table_id, - dest_table_id, src_schema_version, dst_schema_version, parallelism, consumer_group_id, alter_table_arg, task_status, 0/*snapshot*/))) { + dest_table_id, src_schema_version, dst_schema_version, parallelism, consumer_group_id, sub_task_trace_id, alter_table_arg, task_status, 0/*snapshot*/))) { LOG_WARN("fail to init ObDropPrimaryKeyTask", K(ret)); } else { execution_id_ = 1L; @@ -92,7 +92,7 @@ int ObRecoverRestoreTableTask::obtain_snapshot(const ObDDLTaskStatus next_task_s LOG_WARN("error sys, root service must not be nullptr", K(ret)); } else if (snapshot_version_ > 0) { // do nothing, already hold snapshot. - } else if (OB_FAIL(ObDDLWaitTransEndCtx::calc_snapshot_with_gts(dst_tenant_id_, 0/*trans_end_snapshot*/, snapshot_version_))) { + } else if (OB_FAIL(ObDDLWaitTransEndCtx::calc_snapshot_with_gts(dst_tenant_id_, task_id_, 0/*trans_end_snapshot*/, snapshot_version_))) { // fetch snapshot. LOG_WARN("calc snapshot with gts failed", K(ret), K(dst_tenant_id_)); } else if (snapshot_version_ <= 0) { @@ -322,4 +322,4 @@ int ObRecoverRestoreTableTask::process() ddl_tracing_.release_span_hierarchy(); } return ret; -} \ No newline at end of file +} diff --git a/src/rootserver/ddl_task/ob_recover_restore_table_task.h b/src/rootserver/ddl_task/ob_recover_restore_table_task.h index a5125f2e57..4e364e9231 100644 --- a/src/rootserver/ddl_task/ob_recover_restore_table_task.h +++ b/src/rootserver/ddl_task/ob_recover_restore_table_task.h @@ -42,6 +42,7 @@ public: const int64_t dest_schema_version, const int64_t parallelism, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); diff --git a/src/rootserver/ddl_task/ob_table_redefinition_task.cpp b/src/rootserver/ddl_task/ob_table_redefinition_task.cpp index c19c3ae5c2..0f2c3364b3 100755 --- a/src/rootserver/ddl_task/ob_table_redefinition_task.cpp +++ b/src/rootserver/ddl_task/ob_table_redefinition_task.cpp @@ -17,6 +17,7 @@ #include "share/ob_ddl_error_message_table_operator.h" #include "share/ob_autoincrement_service.h" #include "share/ob_ddl_checksum.h" +#include "share/ob_ddl_sim_point.h" #include "rootserver/ddl_task/ob_ddl_scheduler.h" #include "rootserver/ob_root_service.h" #include "rootserver/ddl_task/ob_ddl_redefinition_task.h" @@ -48,7 +49,7 @@ ObTableRedefinitionTask::~ObTableRedefinitionTask() int ObTableRedefinitionTask::init(const uint64_t src_tenant_id, const uint64_t dst_tenant_id, const int64_t task_id, const share::ObDDLType &ddl_type, const int64_t data_table_id, const int64_t dest_table_id, const int64_t src_schema_version, - const int64_t dst_schema_version, const int64_t parallelism, const int64_t consumer_group_id, + const int64_t dst_schema_version, const int64_t parallelism, const int64_t consumer_group_id, const int32_t sub_task_trace_id, const ObAlterTableArg &alter_table_arg, const int64_t task_status, const int64_t snapshot_version) { int ret = OB_SUCCESS; @@ -74,6 +75,7 @@ int ObTableRedefinitionTask::init(const uint64_t src_tenant_id, const uint64_t d } else { set_gmt_create(ObTimeUtility::current_time()); consumer_group_id_ = consumer_group_id; + sub_task_trace_id_ = sub_task_trace_id; task_type_ = ddl_type; object_id_ = data_table_id; target_object_id_ = dest_table_id; @@ -182,6 +184,8 @@ int ObTableRedefinitionTask::update_complete_sstable_job_status(const common::Ob if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObTableRedefinitionTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UPDATE_COMPLETE_SSTABLE_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else if (ObDDLTaskStatus::CHECK_TABLE_EMPTY == task_status_) { check_table_empty_job_ret_code_ = ret_code; } else { @@ -241,6 +245,8 @@ int ObTableRedefinitionTask::send_build_replica_request_by_sql() if (OB_ISNULL(root_service)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("error unexpected, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_SEND_BUILD_REPLICA_REQUEST_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else if (OB_FAIL(check_modify_autoinc(modify_autoinc))) { LOG_WARN("failed to check modify autoinc", K(ret)); } else if (OB_FAIL(check_use_heap_table_ddl_plan(use_heap_table_ddl_plan))) { @@ -323,6 +329,8 @@ int ObTableRedefinitionTask::check_use_heap_table_ddl_plan(bool &use_heap_table_ if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, TABLE_REDEF_TASK_CHECK_USE_HEAP_PLAN_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else if (OB_FAIL(root_service->get_ddl_service() .get_tenant_schema_guard_with_version_in_inner_table(dst_tenant_id_, schema_guard))) { LOG_WARN("get schema guard failed", K(ret)); @@ -378,7 +386,12 @@ int ObTableRedefinitionTask::table_redefinition(const ObDDLTaskStatus next_task_ // overwrite ret if (is_build_replica_end) { ret = OB_SUCC(ret) ? complete_sstable_job_ret_code_ : ret; - if (OB_SUCC(ret)) { + bool need_verify_checksum = true; +#ifdef ERRSIM + // when the major compaction is delayed, skip verify column checksum + need_verify_checksum = 0 == GCONF.errsim_ddl_major_delay_time; +#endif + if (OB_SUCC(ret) && need_verify_checksum) { if (OB_FAIL(replica_end_check(ret))) { LOG_WARN("fail to check", K(ret)); } @@ -418,6 +431,8 @@ int ObTableRedefinitionTask::copy_table_indexes() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_COPY_INDEX_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else { const int64_t MAX_ACTIVE_TASK_CNT = 1; int64_t active_task_cnt = 0; @@ -480,6 +495,9 @@ int ObTableRedefinitionTask::copy_table_indexes() ObDDLTaskRecord task_record; bool need_rebuild_index = true; SMART_VAR(ObCreateIndexArg, create_index_arg) { + ObTraceIdGuard trace_id_guard(get_trace_id()); + ATOMIC_INC(&sub_task_trace_id_); + ObDDLEventInfo ddl_event_info(sub_task_trace_id_); // this create index arg is not valid, only has nls format create_index_arg.nls_date_format_ = alter_table_arg_.nls_formats_[0]; create_index_arg.nls_timestamp_format_ = alter_table_arg_.nls_formats_[1]; @@ -508,6 +526,7 @@ int ObTableRedefinitionTask::copy_table_indexes() &allocator_, &create_index_arg, task_id_); + param.sub_task_trace_id_ = sub_task_trace_id_; if (OB_FAIL(GCTX.root_service_->get_ddl_task_scheduler().create_ddl_task(param, *GCTX.sql_proxy_, task_record))) { if (OB_ENTRY_EXIST == ret) { ret = OB_SUCCESS; @@ -520,7 +539,10 @@ int ObTableRedefinitionTask::copy_table_indexes() LOG_WARN("fail to schedule ddl task", K(ret), K(task_record)); } } - if (OB_SUCC(ret) && need_rebuild_index) { + if (OB_FAIL(ret)) { + add_event_info("create table_redefinition index task fail"); + LOG_WARN("add build index task failed", K(ret), K(task_record), K(ddl_event_info)); + } else if (need_rebuild_index) { TCWLockGuard guard(lock_); const uint64_t task_key = index_ids.at(i); DependTaskStatus status; @@ -532,7 +554,8 @@ int ObTableRedefinitionTask::copy_table_indexes() LOG_WARN("set dependent task map failed", K(ret), K(task_key)); } } - LOG_INFO("add build index task", K(ret), K(task_key), K(status)); + add_event_info("create table_redefinition index task succ"); + LOG_INFO("add build index task", K(ret), K(task_key), K(status), K(ddl_event_info)); } } } @@ -557,6 +580,8 @@ int ObTableRedefinitionTask::copy_table_constraints() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_COPY_CONSTRAINT_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else { if (has_rebuild_constraint_) { // do nothing @@ -624,6 +649,8 @@ int ObTableRedefinitionTask::copy_table_foreign_keys() } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_COPY_FOREIGN_KEY_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else { if (has_rebuild_foreign_key_) { // do nothing @@ -687,6 +714,8 @@ int ObTableRedefinitionTask::copy_table_dependent_objects(const ObDDLTaskStatus } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, REDEF_TASK_COPY_DEPENDENT_OBJECTS_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (!dependent_task_result_map_.created() && OB_FAIL(dependent_task_result_map_.create(MAX_DEPEND_OBJECT_COUNT, lib::ObLabel("DepTasMap")))) { LOG_WARN("create dependent task map failed", K(ret)); } else { @@ -762,7 +791,7 @@ int ObTableRedefinitionTask::take_effect(const ObDDLTaskStatus next_task_status) { int ret = OB_SUCCESS; #ifdef ERRSIM - SERVER_EVENT_ADD("ddl_task", "before_table_redefinition_task_effect", + ROOTSERVICE_EVENT_ADD("ddl_task", "before_table_redefinition_task_effect", "tenant_id", tenant_id_, "object_id", object_id_, "target_object_id", target_object_id_); @@ -788,6 +817,8 @@ int ObTableRedefinitionTask::take_effect(const ObDDLTaskStatus next_task_status) } else if (OB_ISNULL(root_service)) { ret = OB_ERR_SYS; LOG_WARN("error sys, root service must not be nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_TAKE_EFFECT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard(dst_tenant_id_, schema_guard))) { LOG_WARN("get tenant schema guard failed", K(ret)); } else if (OB_FAIL(schema_guard.get_table_schema(dst_tenant_id_, target_object_id_, table_schema))) { @@ -831,6 +862,18 @@ int ObTableRedefinitionTask::take_effect(const ObDDLTaskStatus next_task_status) LOG_WARN("fail to switch status", K(ret)); } } + char object_id_buffer[256]; + snprintf(object_id_buffer, sizeof(object_id_buffer), "object_id:%ld, target_object_id:%ld", + object_id_, target_object_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "table redefinition task take effect", + "tenant_id", tenant_id_, + "ret", ret, + K_(trace_id), + K_(task_id), + "object_id", object_id_buffer, + K_(schema_version), + next_task_status); + LOG_INFO("table redefinition task take effect", K(ret), "ddl_event_info", ObDDLEventInfo(), K(*this)); return ret; } @@ -840,6 +883,8 @@ int ObTableRedefinitionTask::repending(const share::ObDDLTaskStatus next_task_st if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObDDLRedefinitionTask has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, TABLE_REDEF_TASK_REPENDING_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id_), K(task_id_)); } else { switch (task_type_) { case DDL_DIRECT_LOAD: @@ -1241,6 +1286,8 @@ int ObTableRedefinitionTask::collect_longops_stat(ObLongopsValue &value) } if (OB_FAIL(ret)) { + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, DDL_TASK_COLLECT_LONGOPS_STAT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(copy_longops_stat(value))) { LOG_WARN("failed to collect common longops stat", K(ret)); } @@ -1317,6 +1364,10 @@ int ObTableRedefinitionTask::get_direct_load_job_stat(common::ObArenaAllocator & "AND JOB_ID=%ld AND JOB_TYPE='direct' AND COORDINATOR_STATUS!='none'", OB_ALL_VIRTUAL_LOAD_DATA_STAT_TNAME, tenant_id_, object_id_))) { LOG_WARN("failed to assign sql", KR(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, TABLE_REDEF_TASK_GET_DIRECT_LOAD_JOB_STAT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, TABLE_REDEF_TASK_GET_DIRECT_LOAD_JOB_STAT_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(sql_proxy.read(select_res, OB_SYS_TENANT_ID, select_sql.ptr()))) { LOG_WARN("fail to execute sql", KR(ret), K(select_sql)); } else if (OB_ISNULL(select_result = select_res.get_result())) { diff --git a/src/rootserver/ddl_task/ob_table_redefinition_task.h b/src/rootserver/ddl_task/ob_table_redefinition_task.h index f2fdfc1022..bb0f7a8c21 100644 --- a/src/rootserver/ddl_task/ob_table_redefinition_task.h +++ b/src/rootserver/ddl_task/ob_table_redefinition_task.h @@ -42,6 +42,7 @@ public: const int64_t dest_schema_version, const int64_t parallelism, const int64_t consumer_group_id, + const int32_t sub_task_trace_id, const obrpc::ObAlterTableArg &alter_table_arg, const int64_t task_status = share::ObDDLTaskStatus::PREPARE, const int64_t snapshot_version = 0); diff --git a/src/rootserver/ob_root_service.cpp b/src/rootserver/ob_root_service.cpp index 644ea3b60a..91359a6f27 100755 --- a/src/rootserver/ob_root_service.cpp +++ b/src/rootserver/ob_root_service.cpp @@ -85,6 +85,7 @@ #include "rootserver/ob_ddl_sql_generator.h" #include "rootserver/ddl_task/ob_ddl_task.h" #include "rootserver/ddl_task/ob_constraint_task.h" +#include "share/ob_ddl_sim_point.h" #include "storage/ob_file_system_router.h" #include "storage/tx/ob_ts_mgr.h" #include "lib/stat/ob_diagnose_info.h" @@ -1610,7 +1611,11 @@ int ObRootService::schedule_load_ddl_task() { int ret = OB_SUCCESS; const bool did_repeat = false; +#ifdef ERRSIM + const int64_t delay = 1000L * 1000L; //1s +#else const int64_t delay = 5L * 1000L * 1000L; //5s +#endif if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); @@ -3031,9 +3036,13 @@ int ObRootService::parallel_create_table(const ObCreateTableArg &arg, ObCreateTa } int64_t cost = ObTimeUtility::current_time() - begin_time; LOG_TRACE("finish create table", KR(ret), K(arg), K(cost)); - ROOTSERVICE_EVENT_ADD("ddl", "parallel_create_table", - K(ret), K(tenant_id), - "table_id", res.table_id_, K(cost)); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "parallel create table", + K(tenant_id), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "table_id", res.table_id_, + "schema_version", res.schema_version_, + K(cost)); return ret; } @@ -3728,7 +3737,14 @@ int ObRootService::create_table(const ObCreateTableArg &arg, ObCreateTableRes &r RS_TRACE(create_table_end); FORCE_PRINT_TRACE(THE_RS_TRACE, "[create table]"); int64_t cost = ObTimeUtility::current_time() - begin_time; - ROOTSERVICE_EVENT_ADD("ddl", "create_table", K(ret), "table_id", res.table_id_, K(cost)); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "create table", + "tenant_id", arg.schema_.get_tenant_id(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "table_id", res.table_id_, + "schema_version", res.schema_version_, + K(cost)); + LOG_INFO("finish create table ddl", K(ret), K(cost), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -3994,9 +4010,24 @@ int ObRootService::create_hidden_table(const obrpc::ObCreateHiddenTableArg &arg, } else if (OB_UNLIKELY(!arg.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, CREATE_HIDDEN_TABLE_RPC_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, CREATE_HIDDEN_TABLE_RPC_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(arg)); } else if (OB_FAIL(ddl_service_.create_hidden_table(arg, res))) { LOG_WARN("do create hidden table in trans failed", K(ret), K(arg)); } + char tenant_id_buffer[128]; + snprintf(tenant_id_buffer, sizeof(tenant_id_buffer), "orig_tenant_id:%ld, target_tenant_id:%ld", + arg.tenant_id_, arg.dest_tenant_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "create hidden table", + "tenant_id", tenant_id_buffer, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", arg.table_id_, + "schema_version", res.schema_version_); + LOG_INFO("finish create hidden table ddl", K(ret), K(arg), K(res), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4042,9 +4073,19 @@ int ObRootService::abort_redef_table(const obrpc::ObAbortRedefTableArg &arg) } else if (OB_UNLIKELY(!arg.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, ABORT_REDEF_TABLE_RPC_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, ABORT_REDEF_TABLE_RPC_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(arg)); } else if (OB_FAIL(ddl_scheduler_.abort_redef_table(ObDDLTaskID(tenant_id, task_id)))) { LOG_WARN("cancel task failed", K(ret), K(tenant_id), K(task_id)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "abort redef table", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", arg.task_id_); + LOG_INFO("finish abort redef table ddl", K(ret), K(arg), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4066,9 +4107,19 @@ int ObRootService::finish_redef_table(const obrpc::ObFinishRedefTableArg &arg) } else if (OB_UNLIKELY(!arg.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, FINISH_REDEF_TABLE_RPC_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, FINISH_REDEF_TABLE_RPC_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(arg)); } else if (OB_FAIL(ddl_scheduler_.finish_redef_table(ObDDLTaskID(tenant_id, task_id)))) { LOG_WARN("failed to finish redef table", K(ret), K(task_id), K(tenant_id)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "finish redef table", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", arg.task_id_); + LOG_INFO("finish abort redef table ddl", K(ret), K(arg), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4095,6 +4146,10 @@ int ObRootService::copy_table_dependents(const obrpc::ObCopyTableDependentsArg & } else if (OB_UNLIKELY(!arg.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, COPY_TABLE_DEPENDENTS_RPC_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, COPY_TABLE_DEPENDENTS_RPC_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(arg)); } else if (OB_FAIL(ddl_scheduler_.copy_table_dependents(ObDDLTaskID(tenant_id, task_id), is_copy_constraints, is_copy_indexes, @@ -4103,6 +4158,12 @@ int ObRootService::copy_table_dependents(const obrpc::ObCopyTableDependentsArg & is_ignore_errors))) { LOG_WARN("failed to copy table dependents", K(ret), K(arg)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "copy table dependents", + "tenant_id", tenant_id, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", task_id); + LOG_INFO("finish copy table dependents ddl", K(ret), K(arg), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4126,6 +4187,20 @@ int ObRootService::start_redef_table(const obrpc::ObStartRedefTableArg &arg, obr } else if (OB_FAIL(ddl_scheduler_.start_redef_table(arg, res))) { LOG_WARN("start redef table failed", K(ret)); } + char tenant_id_buffer[128]; + snprintf(tenant_id_buffer, sizeof(tenant_id_buffer), "orig_tenant_id:%ld, target_tenant_id:%ld", + arg.orig_tenant_id_, arg.target_tenant_id_); + char table_id_buffer[128]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "orig_table_id:%ld, target_table_id:%ld", + arg.orig_table_id_, arg.target_table_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "redef table", + "tenant_id", tenant_id_buffer, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", table_id_buffer, + "schema_version", res.schema_version_); + LOG_INFO("finish redef table ddl", K(arg), K(ret), K(res), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4250,6 +4325,17 @@ int ObRootService::alter_table(const obrpc::ObAlterTableArg &arg, obrpc::ObAlter } } } + char table_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "table_id:%ld, hidden_table_id:%ld", + arg.table_id_, arg.hidden_table_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "alter table", + K(tenant_id), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", table_id_buffer, + "schema_version", res.schema_version_); + LOG_INFO("finish alter table ddl", K(ret), K(arg), K(res), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4274,6 +4360,17 @@ int ObRootService::create_index(const ObCreateIndexArg &arg, obrpc::ObAlterTable LOG_WARN("create_index failed", K(arg), K(ret)); } } + char table_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "data_table_id:%ld, index_table_id:%ld", + arg.data_table_id_, arg.index_table_id_); + ROOTSERVICE_EVENT_ADD("ddl scheduler", "create index", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", table_id_buffer, + "schema_version", res.schema_version_); + LOG_INFO("finish create index ddl", K(ret), K(arg), K(res), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4356,6 +4453,14 @@ int ObRootService::drop_table(const obrpc::ObDropTableArg &arg, obrpc::ObDDLRes } else if (OB_FAIL(ddl_service_.drop_table(arg, res))) { LOG_WARN("ddl service failed to drop table", K(ret), K(arg), K(res)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "drop table", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "session_id", arg.session_id_, + "schema_version", res.schema_id_); + LOG_INFO("finish drop table ddl", K(ret), K(arg), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4488,6 +4593,14 @@ int ObRootService::drop_index(const obrpc::ObDropIndexArg &arg, obrpc::ObDropInd LOG_WARN("index_builder drop_index failed", K(arg), K(ret)); } } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "drop index", + "tenant_id", res.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", arg.index_table_id_, + "schema_version", res.schema_version_); + LOG_INFO("finish drop index ddl", K(ret), K(arg), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4503,6 +4616,14 @@ int ObRootService::rebuild_index(const obrpc::ObRebuildIndexArg &arg, obrpc::ObA } else if (OB_FAIL(ddl_service_.rebuild_index(arg, res))) { LOG_WARN("ddl_service rebuild index failed", K(arg), K(ret)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "rebuild index", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", arg.index_table_id_, + "schema_version", res.schema_version_); + LOG_INFO("finish rebuild index ddl", K(ret), K(arg), K(res), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4606,6 +4727,14 @@ int ObRootService::truncate_table(const obrpc::ObTruncateTableArg &arg, obrpc::O LOG_WARN("ddl service failed to truncate table", K(arg), K(ret), K(frozen_scn)); } } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "truncate table", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", arg.table_name_, + "schema_version", res.schema_id_); + LOG_INFO("finish truncate table ddl", K(ret), K(arg), K(res), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -4628,6 +4757,15 @@ int ObRootService::truncate_table_v2(const obrpc::ObTruncateTableArg &arg, obrpc } else if (OB_FAIL(ddl_service_.new_truncate_table(arg, res, frozen_scn))) { LOG_WARN("ddl service failed to truncate table", K(arg), K(ret)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "truncate table new", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_name", arg.table_name_, + "schema_version", res.schema_id_, + frozen_scn); + LOG_INFO("finish new truncate table ddl", K(ret), K(arg), K(res), "ddl_event_info", ObDDLEventInfo()); } return ret; } @@ -4832,6 +4970,8 @@ int ObRootService::calc_column_checksum_repsonse(const obrpc::ObCalcColumnChecks } else if (OB_UNLIKELY(!arg.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, PROCESS_COLUMN_CHECKSUM_RESPONSE_SLOW))) { + LOG_WARN("ddl sim failure: procesc column checksum response slow", K(ret)); } else if (OB_FAIL(ddl_scheduler_.on_column_checksum_calc_reply( arg.tablet_id_, ObDDLTaskKey(arg.tenant_id_, arg.target_table_id_, arg.schema_version_), arg.ret_code_))) { LOG_WARN("handle column checksum calc response failed", K(ret), K(arg)); @@ -8712,8 +8852,13 @@ int ObRootService::upgrade_table_schema(const obrpc::ObUpgradeTableSchemaArg &ar } } } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "update table schema", + "tenant_id", arg.get_tenant_id(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "table_id", arg.get_table_id()); FLOG_INFO("[UPGRADE] finish upgrade table", KR(ret), K(arg), - "cost_us", ObTimeUtility::current_time() - start); + "cost_us", ObTimeUtility::current_time() - start, "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -10150,10 +10295,21 @@ int ObRootService::build_ddl_single_replica_response(const obrpc::ObDDLBuildSing } else if (OB_UNLIKELY(!arg.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(arg)); + } else if (OB_FAIL(DDL_SIM(arg.tenant_id_, arg.task_id_, PROCESS_BUILD_SSTABLE_RESPONSE_SLOW))) { + LOG_WARN("ddl sim failure: procesc build sstable response slow", K(ret)); } else if (OB_FAIL(ddl_scheduler_.on_sstable_complement_job_reply( arg.tablet_id_/*source tablet id*/, ObDDLTaskKey(arg.dest_tenant_id_, arg.dest_schema_id_, arg.dest_schema_version_), arg.snapshot_version_, arg.execution_id_, arg.ret_code_, info))) { LOG_WARN("handle column checksum calc response failed", K(ret), K(arg)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "build ddl single replica response", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", arg.task_id_, + "tablet_id_", arg.tablet_id_, + "snapshot_version_", arg.snapshot_version_, + arg.source_table_id_); + LOG_INFO("finish build ddl single replica response ddl", K(ret), K(arg), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -10369,6 +10525,12 @@ int ObRootService::cancel_ddl_task(const ObCancelDDLTaskArg &arg) } else { LOG_INFO("succeed to cancel ddl task", K(arg)); } + ROOTSERVICE_EVENT_ADD("ddl scheduler", "cancel ddl task", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", arg.get_task_id()); + LOG_INFO("finish cancel ddl task ddl", K(ret), K(arg), "ddl_event_info", ObDDLEventInfo()); return ret; } diff --git a/src/share/CMakeLists.txt b/src/share/CMakeLists.txt index 834b771594..c599ab42a5 100644 --- a/src/share/CMakeLists.txt +++ b/src/share/CMakeLists.txt @@ -88,6 +88,7 @@ ob_set_subtarget(ob_share common ob_column_checksum_error_operator.cpp ob_core_table_proxy.cpp ob_ddl_checksum.cpp + ob_ddl_sim_point.cpp ob_ddl_common.cpp ob_ddl_error_message_table_operator.cpp ob_ddl_task_executor.cpp diff --git a/src/share/inner_table/ob_inner_table_schema.11101_11150.cpp b/src/share/inner_table/ob_inner_table_schema.11101_11150.cpp index 59a8549396..bd9648dde4 100644 --- a/src/share/inner_table/ob_inner_table_schema.11101_11150.cpp +++ b/src/share/inner_table/ob_inner_table_schema.11101_11150.cpp @@ -3795,6 +3795,254 @@ int ObInnerTableSchema::all_virtual_tablet_stat_schema(ObTableSchema &table_sche return ret; } +int ObInnerTableSchema::all_virtual_ddl_sim_point_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_DDL_SIM_POINT_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(0); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_DDL_SIM_POINT_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("sim_point_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("sim_point_name", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + 1024, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("sim_point_description", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_CHAR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("sim_point_action", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_CHAR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + +int ObInnerTableSchema::all_virtual_ddl_sim_point_stat_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_DDL_SIM_POINT_STAT_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(0); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_DDL_SIM_POINT_STAT_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_ip", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 1, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_port", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 2, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ddl_task_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("sim_point_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("trigger_count", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + if (OB_SUCC(ret)) { + table_schema.get_part_option().set_part_num(1); + table_schema.set_part_level(PARTITION_LEVEL_ONE); + table_schema.get_part_option().set_part_func_type(PARTITION_FUNC_TYPE_LIST_COLUMNS); + if (OB_FAIL(table_schema.get_part_option().set_part_expr("svr_ip, svr_port"))) { + LOG_WARN("set_part_expr failed", K(ret)); + } else if (OB_FAIL(table_schema.mock_list_partition_array())) { + LOG_WARN("mock list partition array failed", K(ret)); + } + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + } // end namespace share } // end namespace oceanbase diff --git a/src/share/inner_table/ob_inner_table_schema.h b/src/share/inner_table/ob_inner_table_schema.h index 7f5c6b8d34..dbc59cf7cc 100644 --- a/src/share/inner_table/ob_inner_table_schema.h +++ b/src/share/inner_table/ob_inner_table_schema.h @@ -645,6 +645,8 @@ public: static int all_virtual_io_quota_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_server_compaction_event_history_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_tablet_stat_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_ddl_sim_point_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_ddl_sim_point_stat_schema(share::schema::ObTableSchema &table_schema); static int session_variables_schema(share::schema::ObTableSchema &table_schema); static int global_status_schema(share::schema::ObTableSchema &table_schema); static int session_status_schema(share::schema::ObTableSchema &table_schema); @@ -3081,6 +3083,8 @@ const schema_create_func virtual_table_schema_creators [] = { ObInnerTableSchema::all_virtual_io_quota_schema, ObInnerTableSchema::all_virtual_server_compaction_event_history_schema, ObInnerTableSchema::all_virtual_tablet_stat_schema, + ObInnerTableSchema::all_virtual_ddl_sim_point_schema, + ObInnerTableSchema::all_virtual_ddl_sim_point_stat_schema, ObInnerTableSchema::session_variables_schema, ObInnerTableSchema::global_status_schema, ObInnerTableSchema::session_status_schema, @@ -9024,6 +9028,7 @@ const uint64_t cluster_distributed_vtables [] = { OB_ALL_VIRTUAL_IO_CALIBRATION_STATUS_TID, OB_ALL_VIRTUAL_IO_BENCHMARK_TID, OB_ALL_VIRTUAL_IO_QUOTA_TID, + OB_ALL_VIRTUAL_DDL_SIM_POINT_STAT_TID, OB_ALL_VIRTUAL_TENANT_MEMSTORE_ALLOCATOR_INFO_TID, OB_ALL_VIRTUAL_BAD_BLOCK_TABLE_TID, OB_ALL_VIRTUAL_TABLET_STORE_STAT_TID, @@ -11618,11 +11623,11 @@ static inline int get_sys_table_lob_aux_schema(const uint64_t tid, const int64_t OB_CORE_TABLE_COUNT = 4; const int64_t OB_SYS_TABLE_COUNT = 261; -const int64_t OB_VIRTUAL_TABLE_COUNT = 740; +const int64_t OB_VIRTUAL_TABLE_COUNT = 742; const int64_t OB_SYS_VIEW_COUNT = 788; -const int64_t OB_SYS_TENANT_TABLE_COUNT = 1794; +const int64_t OB_SYS_TENANT_TABLE_COUNT = 1796; const int64_t OB_CORE_SCHEMA_VERSION = 1; -const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1797; +const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1799; } // end namespace share } // end namespace oceanbase diff --git a/src/share/inner_table/ob_inner_table_schema_constants.h b/src/share/inner_table/ob_inner_table_schema_constants.h index adfa22fb8f..679c4a0b70 100644 --- a/src/share/inner_table/ob_inner_table_schema_constants.h +++ b/src/share/inner_table/ob_inner_table_schema_constants.h @@ -381,6 +381,8 @@ const uint64_t OB_ALL_VIRTUAL_IO_BENCHMARK_TID = 11114; // "__all_virtual_io_ben const uint64_t OB_ALL_VIRTUAL_IO_QUOTA_TID = 11115; // "__all_virtual_io_quota" const uint64_t OB_ALL_VIRTUAL_SERVER_COMPACTION_EVENT_HISTORY_TID = 11116; // "__all_virtual_server_compaction_event_history" const uint64_t OB_ALL_VIRTUAL_TABLET_STAT_TID = 11117; // "__all_virtual_tablet_stat" +const uint64_t OB_ALL_VIRTUAL_DDL_SIM_POINT_TID = 11118; // "__all_virtual_ddl_sim_point" +const uint64_t OB_ALL_VIRTUAL_DDL_SIM_POINT_STAT_TID = 11119; // "__all_virtual_ddl_sim_point_stat" const uint64_t OB_SESSION_VARIABLES_TID = 12001; // "SESSION_VARIABLES" const uint64_t OB_GLOBAL_STATUS_TID = 12006; // "GLOBAL_STATUS" const uint64_t OB_SESSION_STATUS_TID = 12008; // "SESSION_STATUS" @@ -2801,6 +2803,8 @@ const char *const OB_ALL_VIRTUAL_IO_BENCHMARK_TNAME = "__all_virtual_io_benchmar const char *const OB_ALL_VIRTUAL_IO_QUOTA_TNAME = "__all_virtual_io_quota"; const char *const OB_ALL_VIRTUAL_SERVER_COMPACTION_EVENT_HISTORY_TNAME = "__all_virtual_server_compaction_event_history"; const char *const OB_ALL_VIRTUAL_TABLET_STAT_TNAME = "__all_virtual_tablet_stat"; +const char *const OB_ALL_VIRTUAL_DDL_SIM_POINT_TNAME = "__all_virtual_ddl_sim_point"; +const char *const OB_ALL_VIRTUAL_DDL_SIM_POINT_STAT_TNAME = "__all_virtual_ddl_sim_point_stat"; const char *const OB_SESSION_VARIABLES_TNAME = "SESSION_VARIABLES"; const char *const OB_GLOBAL_STATUS_TNAME = "GLOBAL_STATUS"; const char *const OB_SESSION_STATUS_TNAME = "SESSION_STATUS"; diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index d4b5ee1ff0..130e48e3f6 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -9062,6 +9062,43 @@ def_table_schema( # table_id = 11118: used for __all_virtual_ddl_sim_point on enhance_ddl_quality branch # table_id = 11119: used for __all_virtual_ddl_sim_point_stat on enhance_ddl_quality branch +def_table_schema( + owner = 'jianyun.sjy', + table_name = '__all_virtual_ddl_sim_point', + table_id = '11118', + table_type = 'VIRTUAL_TABLE', + gm_columns = [], + rowkey_columns = [], + normal_columns = [ + ('sim_point_id', 'int'), + ('sim_point_name', 'varchar:1024'), + ('sim_point_description', 'varchar:OB_MAX_CHAR_LENGTH'), + ('sim_point_action', 'varchar:OB_MAX_CHAR_LENGTH'), + ], + vtable_route_policy = 'local', +) + +def_table_schema( + owner = 'jianyun.sjy', + table_name = '__all_virtual_ddl_sim_point_stat', + table_id = '11119', + table_type = 'VIRTUAL_TABLE', + gm_columns = [], + rowkey_columns = [], + normal_columns = [ + ('svr_ip', 'varchar:MAX_IP_ADDR_LENGTH'), + ('svr_port', 'int'), + ('tenant_id', 'int'), + ('ddl_task_id', 'int'), + ('sim_point_id', 'int'), + ('trigger_count', 'int'), + ], + partition_columns = ['svr_ip', 'svr_port'], + vtable_route_policy = 'distributed', +) + + + ################################################################ ################################################################ # INFORMATION SCHEMA diff --git a/src/share/ob_ddl_checksum.cpp b/src/share/ob_ddl_checksum.cpp index 34e0cac9ca..5f445eb967 100644 --- a/src/share/ob_ddl_checksum.cpp +++ b/src/share/ob_ddl_checksum.cpp @@ -17,6 +17,7 @@ #include "share/inner_table/ob_inner_table_schema.h" #include "share/schema/ob_schema_utils.h" #include "share/schema/ob_multi_version_schema_service.h" +#include "share/ob_ddl_sim_point.h" #include "observer/ob_server_struct.h" using namespace oceanbase::common; @@ -104,6 +105,8 @@ int ObDDLChecksumOperator::update_checksum(const ObIArray &ch if (0 == checksum_items.count()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(checksum_items.count())); + } else if (OB_FAIL(DDL_SIM(checksum_items.at(0).tenant_id_, checksum_items.at(0).ddl_task_id_, UPDATE_DDL_CHECKSUM_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(checksum_items.at(0))); } else { for (int64_t i = 0; OB_SUCC(ret) && i < checksum_items.count(); ++i) { const ObDDLChecksumItem &item = checksum_items.at(i); @@ -143,7 +146,9 @@ int ObDDLChecksumOperator::update_checksum(const ObIArray &ch } if (OB_SUCC(ret)) { - if (OB_FAIL(sql_proxy.write(tenant_id, sql.ptr(), affected_rows))) { + if (OB_FAIL(DDL_SIM(checksum_items.at(0).tenant_id_, checksum_items.at(0).ddl_task_id_, UPDATE_DDL_CHECKSUM_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(checksum_items.at(0))); + } else if (OB_FAIL(sql_proxy.write(tenant_id, sql.ptr(), affected_rows))) { LOG_WARN("fail to execute sql", K(ret)); } else if (OB_UNLIKELY(affected_rows > 2 * checksum_items.count())) { ret = OB_ERR_UNEXPECTED; @@ -272,6 +277,8 @@ int ObDDLChecksumOperator::get_tablet_checksum_record( LOG_WARN("invalid argument", K(ret), K(tenant_id), K(execution_id), K(table_id), K(ddl_task_id), K(tablet_checksum_status_map.created())); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, GET_TABLET_COLUMN_CHECKSUM_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(ddl_task_id)); } else { int64_t batch_size = 100; ObArray batch_tablet_array; @@ -295,6 +302,8 @@ int ObDDLChecksumOperator::get_tablet_checksum_record( batch_tablet_array.at(0), // first tablet_id in one batch last_tablet_id_id))) { // last tablet id in one batch LOG_WARN("fail to assign fmt", K(ret), K(tenant_id), K(execution_id), K(ddl_task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, GET_TABLET_COLUMN_CHECKSUM_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(ddl_task_id)); } else if (OB_FAIL(get_tablet_checksum_status( sql, tenant_id, batch_tablet_array, sql_proxy, tablet_checksum_status_map))) { LOG_WARN("fail to get column checksum", K(ret), K(sql)); @@ -325,6 +334,8 @@ int ObDDLChecksumOperator::get_table_column_checksum( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(tenant_id), K(execution_id), K(table_id), K(ddl_task_id), K(column_checksum_map.created())); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, GET_TABLE_COLUMN_CHECKSUM_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(ddl_task_id)); } else if (OB_FAIL(sql.assign_fmt( "SELECT column_id, checksum FROM %s " "WHERE execution_id = %ld AND tenant_id = %ld AND table_id = %ld AND ddl_task_id = %ld AND task_id %s " @@ -332,6 +343,8 @@ int ObDDLChecksumOperator::get_table_column_checksum( execution_id, ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id), ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id), ddl_task_id, is_unique_index_checking ? "< 0" : ">= 0"))) { LOG_WARN("fail to assign fmt", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, GET_TABLE_COLUMN_CHECKSUM_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(ddl_task_id)); } else if (OB_FAIL(get_column_checksum(sql, tenant_id, column_checksum_map, sql_proxy))) { LOG_WARN("fail to get column checksum", K(ret), K(sql)); } @@ -414,6 +427,8 @@ int ObDDLChecksumOperator::delete_checksum( || OB_INVALID_ID == source_table_id || OB_INVALID_ID == dest_table_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(tenant_id), K(execution_id), K(source_table_id), K(dest_table_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, DELETE_DDL_CHECKSUM_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(ddl_task_id)); } else if (OB_INVALID_INDEX != tablet_task_id && OB_FAIL(remove_tablet_chksum_sql.assign_fmt("AND (task_id >> %ld) = %ld ", ObDDLChecksumItem::PX_SQC_ID_OFFSET, tablet_task_id))) { LOG_WARN("assign fmt failed", K(ret), K(tablet_task_id), K(remove_tablet_chksum_sql)); @@ -423,6 +438,8 @@ int ObDDLChecksumOperator::delete_checksum( OB_ALL_DDL_CHECKSUM_TNAME, execution_id, ddl_task_id, source_table_id, dest_table_id, static_cast(remove_tablet_chksum_sql.length()), remove_tablet_chksum_sql.ptr()))) { LOG_WARN("fail to assign fmt", K(ret), K(remove_tablet_chksum_sql)); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, DELETE_DDL_CHECKSUM_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(ddl_task_id)); } else if (OB_FAIL(sql_proxy.write(tenant_id, sql.ptr(), affected_rows))) { LOG_WARN("fail to execute sql", KR(ret), K(sql)); } else if (OB_UNLIKELY(affected_rows < 0)) { diff --git a/src/share/ob_ddl_common.cpp b/src/share/ob_ddl_common.cpp index 7710036626..bb64bcd3d0 100644 --- a/src/share/ob_ddl_common.cpp +++ b/src/share/ob_ddl_common.cpp @@ -22,6 +22,7 @@ #include "share/schema/ob_schema_getter_guard.h" #include "share/schema/ob_part_mgr_util.h" #include "share/location_cache/ob_location_service.h" +#include "share/ob_ddl_sim_point.h" #include "sql/engine/ob_physical_plan.h" #include "sql/engine/table/ob_table_scan_op.h" #include "storage/tablet/ob_tablet.h" @@ -36,6 +37,109 @@ using namespace oceanbase::share::schema; using namespace oceanbase::obrpc; using namespace oceanbase::sql; +const char *oceanbase::share::get_ddl_type(ObDDLType ddl_type) +{ + const char *ret_name = "UNKNOWN_DDL_TYPE"; + switch (ddl_type) { + case ObDDLType::DDL_INVALID: + ret_name = "DDL_INVALID"; + break; + case ObDDLType::DDL_CHECK_CONSTRAINT: + ret_name = "DDL_CHECK_CONSTRAINT"; + break; + case ObDDLType::DDL_FOREIGN_KEY_CONSTRAINT: + ret_name = "DDL_FOREIGN_KEY_CONSTRAINT"; + break; + case ObDDLType::DDL_ADD_NOT_NULL_COLUMN: + ret_name = "DDL_ADD_NOT_NULL_COLUMN"; + break; + case ObDDLType::DDL_MODIFY_AUTO_INCREMENT: + ret_name = "DDL_MODIFY_AUTO_INCREMENT"; + break; + case ObDDLType::DDL_CREATE_INDEX: + ret_name = "DDL_CREATE_INDEX"; + break; + case ObDDLType::DDL_DROP_INDEX: + ret_name = "DDL_DROP_INDEX"; + break; + case ObDDLType::DDL_DROP_SCHEMA_AVOID_CONCURRENT_TRANS: + ret_name = "DDL_DROP_SCHEMA_AVOID_CONCURRENT_TRANS"; + break; + case ObDDLType::DDL_DROP_DATABASE: + ret_name = "DDL_DROP_DATABASE"; + break; + case ObDDLType::DDL_DROP_TABLE: + ret_name = "DDL_DROP_TABLE"; + break; + case ObDDLType::DDL_TRUNCATE_TABLE: + ret_name = "DDL_TRUNCATE_TABLE"; + break; + case ObDDLType::DDL_DROP_PARTITION: + ret_name = "DDL_DROP_PARTITION"; + break; + case ObDDLType::DDL_DROP_SUB_PARTITION: + ret_name = "DDL_DROP_SUB_PARTITION"; + break; + case ObDDLType::DDL_TRUNCATE_PARTITION: + ret_name = "DDL_TRUNCATE_PARTITION"; + break; + case ObDDLType::DDL_TRUNCATE_SUB_PARTITION: + ret_name = "DDL_TRUNCATE_SUB_PARTITION"; + break; + case ObDDLType::DDL_DOUBLE_TABLE_OFFLINE: + ret_name = "DDL_DOUBLE_TABLE_OFFLINE"; + break; + case ObDDLType::DDL_MODIFY_COLUMN: + ret_name = "DDL_MODIFY_COLUMN"; + break; + case ObDDLType::DDL_ADD_PRIMARY_KEY: + ret_name = "DDL_ADD_PRIMARY_KEY"; + break; + case ObDDLType::DDL_DROP_PRIMARY_KEY: + ret_name = "DDL_DROP_PRIMARY_KEY"; + break; + case ObDDLType::DDL_ALTER_PRIMARY_KEY: + ret_name = "DDL_ALTER_PRIMARY_KEY"; + break; + case ObDDLType::DDL_ALTER_PARTITION_BY: + ret_name = "DDL_ALTER_PARTITION_BY"; + break; + case ObDDLType::DDL_DROP_COLUMN: + ret_name = "DDL_DROP_COLUMN"; + break; + case ObDDLType::DDL_CONVERT_TO_CHARACTER: + ret_name = "DDL_CONVERT_TO_CHARACTER"; + break; + case ObDDLType::DDL_ADD_COLUMN_OFFLINE: + ret_name = "DDL_ADD_COLUMN_OFFLINE"; + break; + case ObDDLType::DDL_COLUMN_REDEFINITION: + ret_name = "DDL_COLUMN_REDEFINITION"; + break; + case ObDDLType::DDL_TABLE_REDEFINITION: + ret_name = "DDL_TABLE_REDEFINITION"; + break; + case ObDDLType::DDL_DIRECT_LOAD: + ret_name = "DDL_DIRECT_LOAD"; + break; + case ObDDLType::DDL_DIRECT_LOAD_INSERT: + ret_name = "DDL_DIRECT_LOAD_INSERT"; + break; + case ObDDLType::DDL_NORMAL_TYPE: + ret_name = "DDL_NORMAL_TYPE"; + break; + case ObDDLType::DDL_ADD_COLUMN_ONLINE: + ret_name = "DDL_ADD_COLUMN_ONLINE"; + break; + case ObDDLType::DDL_CHANGE_COLUMN_NAME: + ret_name = "DDL_CHANGE_COLUMN_NAME"; + break; + default: + break; + } + return ret_name; +} + int ObColumnNameMap::init(const ObTableSchema &orig_table_schema, const AlterTableSchema &alter_table_schema) { @@ -560,6 +664,8 @@ int ObDDLUtil::generate_build_replica_sql( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(data_table_id), K(dest_table_id), K(schema_version), K(snapshot_version), K(execution_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, GENERATE_BUILD_REPLICA_SQL))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_schema_guard( tenant_id, schema_guard, schema_version))) { LOG_WARN("fail to get tenant schema guard", K(ret), K(data_table_id)); @@ -1151,6 +1257,8 @@ int ObDDLUtil::get_data_format_version( || nullptr == GCTX.sql_proxy_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arg", K(ret), K(tenant_id), K(task_id), KP(GCTX.sql_proxy_)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, GET_DATA_FORMAT_VERISON_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else { SMART_VAR(ObMySQLProxy::MySQLResult, res) { ObSqlString query_string; @@ -1874,6 +1982,8 @@ int ObCheckTabletDataComplementOp::check_tablet_checksum_update_status( ret = OB_INVALID_ARGUMENT; LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(index_table_id), K(tablet_ids), K(execution_id), K(ddl_task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, CHECK_TABLET_CHECKSUM_STATUS_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(ddl_task_id)); } else if (OB_FAIL(tablet_checksum_status_map.create(tablet_count, ObModIds::OB_SSTABLE_CREATE_INDEX))) { LOG_WARN("fail to create column checksum map", K(ret)); } else if (OB_FAIL(ObDDLChecksumOperator::get_tablet_checksum_record( @@ -1968,7 +2078,11 @@ int ObCheckTabletDataComplementOp::check_finish_report_checksum( int ret = OB_SUCCESS; bool is_checksums_all_report = false; ObArray dest_tablet_ids; - +#ifdef ERRSIM + if (GCONF.errsim_ddl_major_delay_time.get() > 0) { + return OB_SUCCESS; + } +#endif if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == index_table_id || ddl_task_id == OB_INVALID_ID || execution_id < 0)) { ret = OB_INVALID_ARGUMENT; @@ -2010,6 +2124,8 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task( if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == table_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(table_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, CHECK_OLD_COMPLEMENT_TASK_FAILED))) { + LOG_WARN("ddl sim failure: check old complement task failed", K(ret), K(tenant_id), K(ddl_task_id)); } else { if (OB_FAIL(check_task_inner_sql_session_status(inner_sql_exec_addr, trace_id, tenant_id, ddl_task_id, scn, is_old_task_session_exist))) { LOG_WARN("fail check task inner sql session status", K(ret), K(trace_id), K(inner_sql_exec_addr)); @@ -2049,3 +2165,41 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task( } return ret; } + +//record trace_id +ObDDLEventInfo::ObDDLEventInfo() + : addr_(GCTX.self_addr()), + sub_id_(0), + event_ts_(ObTimeUtility::fast_current_time()) +{ + init_sub_trace_id(sub_id_); +} + +//modify trace_id +ObDDLEventInfo::ObDDLEventInfo(const int32_t sub_id) + : addr_(GCTX.self_addr()), + sub_id_(sub_id), + event_ts_(ObTimeUtility::fast_current_time()) +{ + init_sub_trace_id(sub_id_); +} + +void ObDDLEventInfo::init_sub_trace_id(const int32_t sub_id) +{ + parent_trace_id_ = *ObCurTraceId::get_trace_id(); + if (sub_id == 0) { + // ignore + } else { + ObCurTraceId::set_sub_id(sub_id); + } + trace_id_ = *ObCurTraceId::get_trace_id(); +} + +void ObDDLEventInfo::copy_event(const ObDDLEventInfo &other) +{ + addr_ = other.addr_; + sub_id_ = other.sub_id_; + parent_trace_id_ = other.parent_trace_id_; + trace_id_ = other.trace_id_; + event_ts_ = other.event_ts_; +} diff --git a/src/share/ob_ddl_common.h b/src/share/ob_ddl_common.h index 4864c53bc5..1895a17ade 100644 --- a/src/share/ob_ddl_common.h +++ b/src/share/ob_ddl_common.h @@ -95,6 +95,7 @@ enum ObDDLType ///< @note add new normal ddl type before this line DDL_MAX }; +const char *get_ddl_type(ObDDLType ddl_type); enum ObDDLTaskType { @@ -598,6 +599,29 @@ private: }; +typedef common::ObCurTraceId::TraceId DDLTraceId; +class ObDDLEventInfo final +{ +public: + ObDDLEventInfo(); + ObDDLEventInfo(const int32_t sub_id); + ~ObDDLEventInfo() = default; + void record_in_guard(); + void copy_event(const ObDDLEventInfo &other); + void init_sub_trace_id(const int32_t sub_id); + const DDLTraceId &get_trace_id() const { return trace_id_; } + const DDLTraceId &get_parent_trace_id() const { return parent_trace_id_; } + int set_trace_id(const DDLTraceId &trace_id) { return trace_id_.set(trace_id.get()); } + void reset(); + TO_STRING_KV(K(addr_), K(event_ts_), K(sub_id_), K(trace_id_), K(parent_trace_id_)); + +public: + ObAddr addr_; + int32_t sub_id_; + int64_t event_ts_; + DDLTraceId parent_trace_id_; + DDLTraceId trace_id_; +}; } // end namespace share diff --git a/src/share/ob_ddl_error_message_table_operator.cpp b/src/share/ob_ddl_error_message_table_operator.cpp index 8b6379259e..677a9637d9 100644 --- a/src/share/ob_ddl_error_message_table_operator.cpp +++ b/src/share/ob_ddl_error_message_table_operator.cpp @@ -16,6 +16,7 @@ #include "share/inner_table/ob_inner_table_schema.h" #include "share/ob_get_compat_mode.h" #include "share/schema/ob_schema_utils.h" +#include "share/ob_ddl_sim_point.h" using namespace oceanbase::share; using namespace oceanbase::share::schema; @@ -72,8 +73,8 @@ ObDDLErrorMessageTableOperator::~ObDDLErrorMessageTableOperator() } // to get task id for rebuild unique index task, which is a child task for offline DDL, like drop column. -int ObDDLErrorMessageTableOperator::get_index_task_id( - ObMySQLProxy &sql_proxy, const share::schema::ObTableSchema &index_schema, int64_t &task_id) +int ObDDLErrorMessageTableOperator::get_index_task_info( + ObMySQLProxy &sql_proxy, const share::schema::ObTableSchema &index_schema, ObDDLErrorInfo &info) { int ret = OB_SUCCESS; ObSqlString sql_string; @@ -82,7 +83,7 @@ int ObDDLErrorMessageTableOperator::get_index_task_id( const uint64_t target_object_id = index_schema.get_table_id(); SMART_VAR(ObMySQLProxy::MySQLResult, res) { sqlclient::ObMySQLResult *result = NULL; - if (OB_FAIL(sql_string.assign_fmt("SELECT task_id FROM %s WHERE tenant_id = %lu AND target_object_id = %lu", + if (OB_FAIL(sql_string.assign_fmt("SELECT * FROM %s WHERE tenant_id = %lu AND target_object_id = %lu", OB_ALL_DDL_TASK_STATUS_TNAME, ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id), target_object_id))) { LOG_WARN("assign sql string failed", K(ret), K(exec_tenant_id), K(target_object_id)); } else if (OB_FAIL(sql_proxy.read(res, tenant_id, sql_string.ptr()))) { @@ -93,7 +94,9 @@ int ObDDLErrorMessageTableOperator::get_index_task_id( } else if (OB_FAIL(result->next())) { LOG_WARN("fail to get next row", K(ret)); } else { - EXTRACT_INT_FIELD_MYSQL(*result, "task_id", task_id, int64_t); + EXTRACT_INT_FIELD_MYSQL(*result, "task_id", info.task_id_, int64_t); + EXTRACT_INT_FIELD_MYSQL_WITH_DEFAULT_VALUE(*result, "parent_task_id", info.parent_task_id_, int64_t, true/*skip_null_error*/, true/*skip_column_error*/, 0); + EXTRACT_STRBUF_FIELD_TO_CLASS_MYSQL_WITH_DEFAULT_VALUE(*result, trace_id, info, OB_MAX_TRACE_ID_BUFFER_SIZE, true/*skip_null_error*/, true/*skip_column_error*/, "NULL"); } } return ret; @@ -164,6 +167,10 @@ int ObDDLErrorMessageTableOperator::load_ddl_user_error(const uint64_t tenant_id ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id), task_id, ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id)))) { LOG_WARN("fail to assign sql", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DDL_ERR_MESSAGE_OPERATOR_LOAD_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DDL_ERR_MESSAGE_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_proxy.read(res, tenant_id, sql.ptr()))) { LOG_WARN("fail to execute sql", K(ret), K(sql)); } else if (OB_ISNULL(result = res.get_result())) { @@ -239,6 +246,10 @@ int ObDDLErrorMessageTableOperator::get_ddl_error_message( } } if (OB_FAIL(ret)) { + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DDL_ERR_MESSAGE_OPERATOR_LOAD_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DDL_ERR_MESSAGE_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_proxy.read(res, tenant_id, sql.ptr()))) { LOG_WARN("fail to execute sql", K(ret), K(sql)); } else if (OB_ISNULL(result = res.get_result())) { @@ -277,10 +288,25 @@ int ObDDLErrorMessageTableOperator::get_ddl_error_message( return ret; } +int ObDDLErrorMessageTableOperator::report_ddl_error_message(const ObBuildDDLErrorMessage &error_message, + const uint64_t tenant_id, const ObCurTraceId::TraceId &trace_id, const int64_t task_id, const int64_t parent_task_id, + const uint64_t table_id, const int64_t schema_version, const int64_t object_id, const ObAddr &addr, ObMySQLProxy &sql_proxy) +{ + int ret = OB_SUCCESS; + char trace_id_str[OB_MAX_TRACE_ID_BUFFER_SIZE] = { 0 }; + if (OB_UNLIKELY(0 > trace_id.to_string(trace_id_str, sizeof(trace_id_str)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get trace id string failed", K(ret), K(trace_id)); + } else if (OB_FAIL(report_ddl_error_message(error_message, tenant_id, trace_id_str, task_id, parent_task_id, table_id, schema_version, object_id, addr, sql_proxy))) { + LOG_WARN("fail to report ddl error message", K(ret), K(tenant_id), K(table_id)); + } + return ret; +} + //report the status of building index into __all_ddl_error_message int ObDDLErrorMessageTableOperator::report_ddl_error_message(const ObBuildDDLErrorMessage &error_message, - const uint64_t tenant_id, const int64_t task_id, const uint64_t table_id, const int64_t schema_version, - const int64_t object_id, const ObAddr &addr, ObMySQLProxy &sql_proxy) + const uint64_t tenant_id, const char *trace_id, const int64_t task_id, const int64_t parent_task_id, + const uint64_t table_id, const int64_t schema_version, const int64_t object_id, const ObAddr &addr, ObMySQLProxy &sql_proxy) { int ret = OB_SUCCESS; int64_t unused_user_msg_len = 0; @@ -290,6 +316,8 @@ int ObDDLErrorMessageTableOperator::report_ddl_error_message(const ObBuildDDLErr || OB_INVALID_VERSION == schema_version || object_id < -1 || !addr.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id), K(table_id), K(schema_version), K(object_id), K(addr), K(error_message)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DDL_ERR_MESSAGE_OPERATOR_REPORT_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(get_ddl_error_message(tenant_id, task_id, object_id /*target_object_id*/, addr, false /* is_ddl_retry_task */, sql_proxy, report_error_message, unused_user_msg_len))) { if (OB_ENTRY_NOT_EXIST == ret) { @@ -308,6 +336,7 @@ int ObDDLErrorMessageTableOperator::report_ddl_error_message(const ObBuildDDLErr } if (OB_SUCC(ret) && need_report) { + uint64_t tenant_data_version = 0; const uint64_t exec_tenant_id = ObSchemaUtils::get_exec_tenant_id(tenant_id); int64_t affected_rows = 0; ObSqlString update_sql; @@ -315,23 +344,55 @@ int ObDDLErrorMessageTableOperator::report_ddl_error_message(const ObBuildDDLErr if (!addr.ip_to_string(ip, sizeof(ip))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("convert ip to string failed", K(ret), K(addr)); - } else if (OB_FAIL(update_sql.assign_fmt("INSERT INTO %s (tenant_id, task_id, object_id, schema_version, " - "target_object_id, svr_ip, svr_port, ret_code, ddl_type, affected_rows, user_message, dba_message) VALUES(" - "%ld, %ld, %ld, %ld, %ld, \"%s\", %d, %d, %d, %ld, \"%s\", \"%s\") ON DUPLICATE KEY UPDATE ret_code = %d, " - "ddl_type = %d, affected_rows = %ld, user_message = \"%s\", dba_message = \"%s\"", OB_ALL_DDL_ERROR_MESSAGE_TNAME, - ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id), task_id, - ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id), - schema_version, object_id, ip, addr.get_port(), error_message.ret_code_, - error_message.ddl_type_, error_message.affected_rows_, error_message.user_message_, error_message.dba_message_, - error_message.ret_code_, error_message.ddl_type_, error_message.affected_rows_, error_message.user_message_, error_message.dba_message_))) { - LOG_WARN("fail to assign fmt", K(ret), K(table_id), K(schema_version), K(object_id), K(addr), K(error_message)); - } else if (OB_FAIL(sql_proxy.write(tenant_id, update_sql.ptr(), affected_rows))) { //execute update sql - LOG_WARN("fail to write sql", KR(ret), K(update_sql)); - } else if (affected_rows > 2) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected affected rows", K(ret), K(affected_rows)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); } else { - LOG_INFO("process ddl error message report success", K(ret), K(task_id), K(schema_version), K(table_id), K(addr), K(error_message), K(update_sql.ptr())); + ObDMLSqlSplicer dml_splicer; + if (OB_FAIL(dml_splicer.add_pk_column("tenant_id", ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id)))) { + LOG_WARN("failed to add tenant_id", KR(ret), K(ObSchemaUtils::get_extract_tenant_id(exec_tenant_id, tenant_id))); + } else if (OB_FAIL(dml_splicer.add_pk_column(K(task_id)))) { + LOG_WARN("failed to add column task_id", KR(ret), K(task_id)); + } else if (OB_FAIL(dml_splicer.add_pk_column("object_id", ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id)))) { + LOG_WARN("failed to add column object_id", KR(ret), K(ObSchemaUtils::get_extract_schema_id(exec_tenant_id, table_id))); + } else if (OB_FAIL(dml_splicer.add_pk_column("target_object_id", object_id))) { + LOG_WARN("failed to add column object_id", KR(ret), K(object_id)); + } else if (OB_FAIL(dml_splicer.add_pk_column(K(schema_version)))) { + LOG_WARN("failed to add column schema_version", KR(ret), K(schema_version)); + } else if (OB_FAIL(dml_splicer.add_pk_column("svr_ip", ObHexEscapeSqlStr(ip)))) { + LOG_WARN("failed to add column svr_ip", KR(ret), K(ip)); + } else if (OB_FAIL(dml_splicer.add_pk_column("svr_port", addr.get_port()))) { + LOG_WARN("failed to add column svr_port", KR(ret), K(addr.get_port())); + } else if (OB_FAIL(dml_splicer.add_column("ret_code", error_message.ret_code_))) { + LOG_WARN("failed to add column ret_code", KR(ret), K(error_message.ret_code_)); + } else if (OB_FAIL(dml_splicer.add_column("ddl_type", error_message.ddl_type_))) { + LOG_WARN("failed to add column ddl_type", KR(ret), K(error_message.ddl_type_)); + } else if (OB_FAIL(dml_splicer.add_column("affected_rows", error_message.affected_rows_))) { + LOG_WARN("failed to add column affected_rows", KR(ret), K(error_message.affected_rows_)); + } else if (OB_FAIL(dml_splicer.add_column("user_message", ObHexEscapeSqlStr(error_message.user_message_)))) { + LOG_WARN("failed to add column user_message", KR(ret), K(error_message.user_message_)); + } else if (OB_FAIL(dml_splicer.add_column("dba_message", ObHexEscapeSqlStr(error_message.dba_message_)))) { + LOG_WARN("failed to add column dba_message", KR(ret), K(error_message.dba_message_)); + } else if (tenant_data_version >= DATA_VERSION_4_2_2_0) { + if (OB_FAIL(dml_splicer.add_column("trace_id",ObHexEscapeSqlStr(trace_id)))) { + LOG_WARN("failed to add column trace_id", KR(ret), K(trace_id)); + } else if (OB_FAIL(dml_splicer.add_column(K(parent_task_id)))) { + LOG_WARN("failed to add column parent_task_id", KR(ret), K(parent_task_id)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(dml_splicer.splice_insert_update_sql(OB_ALL_DDL_ERROR_MESSAGE_TNAME, update_sql))) { + LOG_WARN("failed to generate insertion sql", KR(ret), K(tenant_id), K(update_sql)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DDL_ERR_MESSAGE_OPERATOR_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); + } else if (OB_FAIL(sql_proxy.write(tenant_id, update_sql.ptr(), affected_rows))) { //execute update sql + LOG_WARN("fail to write sql", KR(ret), K(update_sql)); + } else if (affected_rows > 2) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected affected rows", K(ret), K(affected_rows)); + } else { + LOG_INFO("process ddl error message report success", K(ret), K(task_id), K(schema_version), K(table_id), K(addr), K(error_message), K(update_sql.ptr())); + } + } } } return ret; @@ -403,8 +464,8 @@ int ObDDLErrorMessageTableOperator::build_ddl_error_message( } int ObDDLErrorMessageTableOperator::generate_index_ddl_error_message(const int ret_code, - const ObTableSchema &index_schema, const int64_t task_id, const int64_t object_id, const ObAddr &addr, - ObMySQLProxy &sql_proxy, const char *index_key, int &report_ret_code) + const ObTableSchema &index_schema, const char *trace_id, const int64_t task_id, const int64_t parent_task_id, + const int64_t object_id, const ObAddr &addr, ObMySQLProxy &sql_proxy, const char *index_key, int &report_ret_code) { int ret = OB_SUCCESS; ObBuildDDLErrorMessage error_message; @@ -425,9 +486,9 @@ int ObDDLErrorMessageTableOperator::generate_index_ddl_error_message(const int r index_table_id, DDL_CREATE_INDEX, index_key, report_ret_code))) { LOG_WARN("build ddl error message failed", K(ret), K(data_table_id), K(index_name)); } else if (OB_FAIL(report_ddl_error_message(error_message, //report into __all_ddl_error_message - tenant_id, task_id, data_table_id, schema_version, object_id, addr, sql_proxy))) { + tenant_id, trace_id, task_id, parent_task_id, data_table_id, schema_version, object_id, addr, sql_proxy))) { LOG_WARN("fail to report ddl error message", K(ret), K(tenant_id), K(data_table_id), - K(schema_version), K(object_id), K(addr), K(index_table_id)); + K(schema_version), K(object_id), K(addr), K(index_table_id), K(trace_id)); } return ret; } diff --git a/src/share/ob_ddl_error_message_table_operator.h b/src/share/ob_ddl_error_message_table_operator.h index a31bce0d54..be3b91f3e8 100644 --- a/src/share/ob_ddl_error_message_table_operator.h +++ b/src/share/ob_ddl_error_message_table_operator.h @@ -50,9 +50,43 @@ public: common::ObArenaAllocator allocator_; }; + //for add_column in ddl_error_message + struct ObDDLErrorInfo final + { + public: + ObDDLErrorInfo() + : parent_task_id_(0), task_id_(0), trace_id_() + { + memset(trace_id_str_, 0, sizeof(trace_id_str_)); + } + ~ObDDLErrorInfo() = default; + int set_parent_task_id(const int64_t parent_task_id) + { + parent_task_id_ = parent_task_id; + return common::OB_SUCCESS; + } + int set_task_id(const int64_t task_id) + { + task_id_ = task_id; + return common::OB_SUCCESS; + } + int set_trace_id(const ObString &trace_id) + { + common::ObDataBuffer allocator(trace_id_str_, OB_MAX_TRACE_ID_BUFFER_SIZE); + return common::ob_write_string(allocator, trace_id, trace_id_); + } + + TO_STRING_KV(K(task_id_), K(parent_task_id_), K(trace_id_str_), K(trace_id_)); + public: + int64_t parent_task_id_; + int64_t task_id_; + common::ObString trace_id_; + char trace_id_str_[OB_MAX_TRACE_ID_BUFFER_SIZE]; + }; + ObDDLErrorMessageTableOperator(); virtual ~ObDDLErrorMessageTableOperator(); - static int get_index_task_id(ObMySQLProxy &sql_proxy, const share::schema::ObTableSchema &index_schema, int64_t &task_id); + static int get_index_task_info(ObMySQLProxy &sql_proxy, const share::schema::ObTableSchema &index_schema, ObDDLErrorInfo &info); static int extract_index_key(const share::schema::ObTableSchema &index_schema, const common::ObStoreRowkey &index_key, char *buffer, const int64_t buffer_len); static int load_ddl_user_error(const uint64_t tenant_id, const int64_t task_id, const uint64_t table_id, @@ -61,14 +95,17 @@ public: const common::ObAddr &addr, const bool is_ddl_retry_task, common::ObMySQLProxy &sql_proxy, ObBuildDDLErrorMessage &error_message, int64_t &forward_user_msg_len); static int report_ddl_error_message(const ObBuildDDLErrorMessage &error_message, const uint64_t tenant_id, - const int64_t task_id, const uint64_t table_id, const int64_t schema_version, const int64_t object_id, - const common::ObAddr &addr, common::ObMySQLProxy &sql_proxy); + const char *trace_id, const int64_t task_id, const int64_t parent_task_id, const uint64_t table_id, + const int64_t schema_version, const int64_t object_id, const common::ObAddr &addr, common::ObMySQLProxy &sql_proxy); + static int report_ddl_error_message(const ObBuildDDLErrorMessage &error_message, const uint64_t tenant_id, + const ObCurTraceId::TraceId &trace_id, const int64_t task_id, const int64_t parent_task_id, const uint64_t table_id, + const int64_t schema_version, const int64_t object_id, const common::ObAddr &addr, common::ObMySQLProxy &sql_proxy); static int build_ddl_error_message(const int ret_code, const uint64_t tenant_id, const uint64_t table_id, ObBuildDDLErrorMessage &error_message, const common::ObString index_name, const uint64_t index_id, const ObDDLType ddl_type, const char *message, int &report_ret_code); static int generate_index_ddl_error_message(const int ret_code, const share::schema::ObTableSchema &index_schema, - const int64_t task_id, const int64_t object_id, const common::ObAddr &addr, common::ObMySQLProxy &sql_proxy, - const char *index_key, int &report_ret_code); + const char *trace_id, const int64_t task_id, const int64_t parent_task_id, + const int64_t object_id, const common::ObAddr &addr, common::ObMySQLProxy &sql_proxy, const char *index_key, int &report_ret_code); }; } // end namespace share } // end namespace oceanbase diff --git a/src/share/ob_ddl_sim_point.cpp b/src/share/ob_ddl_sim_point.cpp new file mode 100644 index 0000000000..1f95cd9eab --- /dev/null +++ b/src/share/ob_ddl_sim_point.cpp @@ -0,0 +1,368 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SHARE +#include "lib/random/ob_random.h" +#include "share/ob_ddl_sim_point.h" +#include "share/config/ob_server_config.h" + +using namespace oceanbase::common; +using namespace oceanbase::share; + + +int64_t ObTenantDDLSimContext::to_string(char* buf, const int64_t buf_len) const +{ + int64_t pos = 0; + J_OBJ_START(); + const int64_t MAX_FIXED_POINT_COUNT = 64; + ObDDLSimPointID fixed_points[MAX_FIXED_POINT_COUNT]; + int fixed_point_count = 0; + for (int64_t i = 0; i < MAX_DDL_SIM_POINT_ID; ++i) { + if (nullptr != fixed_points_ && fixed_points_[i] && i < MAX_FIXED_POINT_COUNT) { + fixed_points[fixed_point_count++] = static_cast(i); + } + } + J_KV(K(tenant_id_), K(type_), K(seed_), K(trigger_percent_), + "fixed_points_", ObArrayWrap(fixed_points, fixed_point_count)); + J_OBJ_END(); + return pos; +} + +ObDDLSimPointMgr &ObDDLSimPointMgr::get_instance() +{ + static ObDDLSimPointMgr instance; + return instance; +} + +ObDDLSimPointMgr::ObDDLSimPointMgr() + : is_inited_(false), arena_("ddl_sim_pnt_mgr") +{ + memset(all_points_, 0, sizeof(all_points_)); +} + +int ObDDLSimPointMgr::init() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("ddl sim point mgr already inited", K(ret), K(is_inited_)); + } else if (OB_FAIL(tenant_map_.create(207, "ddl_sim_tnt_map"))) { + LOG_WARN("create tenant context map failed"); + } else if (OB_FAIL(task_sim_map_.create(199999, "ddl_sim_pnt_map"))) { + LOG_WARN("create task sim map failed", K(ret)); + } else { + // 1. remember sim action size +#define RET_ERR(args...) sizeof(ObDDLSimRetAction) +#define N_RET_ERR(max_repeat_times, args...) sizeof(ObDDLSimRetAction) +#define SLEEP_MS(args...) sizeof(ObDDLSimSleepAction) +#define N_SLEEP_MS(args...) sizeof(ObDDLSimSleepAction) +#define DDL_SIM_POINT_DEFINE(type, name, id, desc, action_size) all_points_[id].action_size_ = action_size; +#include "share/ob_ddl_sim_point_define.h" +#undef DDL_SIM_POINT_DEFINE +#undef RET_ERR +#undef N_RET_ERR +#undef SLEEP_MS +#undef N_SLEEP_MS + + // 2. constuct sim point + void *buf = nullptr; +#define RET_ERR(ret_code, args...) new (buf) ObDDLSimRetAction<1 + ARGS_NUM(args)>(1, {ret_code, ##args}) +#define N_RET_ERR(max_repeat_times, ret_code, args...) new (buf) ObDDLSimRetAction<1 + ARGS_NUM(args)>(max_repeat_times, {ret_code, ##args}) +#define SLEEP_MS(min_time, max_time...) new (buf) ObDDLSimSleepAction(1, min_time, ##max_time) +#define N_SLEEP_MS(max_repeat_times, min_time, max_time...) new (buf) ObDDLSimSleepAction(max_repeat_times, min_time, ##max_time) +#define DDL_SIM_POINT_DEFINE(type, name, id, desc, action) \ + if (OB_SUCC(ret)) {\ + if (OB_ISNULL(buf = ob_malloc(all_points_[id].action_size_, "ddl_sim_act"))) {\ + ret = OB_ALLOCATE_MEMORY_FAILED;\ + LOG_WARN("allocate memory for ddl sim action failed", K(ret), K(all_points_[id].action_size_));\ + } else {\ + all_points_[id] = ObDDLSimPoint(name, type, #name, desc, action);\ + }\ + } +#include "share/ob_ddl_sim_point_define.h" +#undef DDL_SIM_POINT_DEFINE +#undef RET_ERR +#undef N_RET_ERR +#undef SLEEP_MS +#undef N_SLEEP_MS + } + if (OB_SUCC(ret)) { + is_inited_ = true; + } + return ret; +} + + +class TenantContextUpdater +{ +public: + TenantContextUpdater(const ObTenantDDLSimContext &tenant_context, const ObIArray &fixed_points_array) + : new_context_(tenant_context), fixed_point_array_(fixed_points_array) {} + ~TenantContextUpdater() = default; + int operator() (hash::HashMapPair &entry) { + int ret = OB_SUCCESS; + if (new_context_.trigger_percent_ > 0 && 0 == entry.second.trigger_percent_) { + entry.second.seed_ = new_context_.seed_; + entry.second.trigger_percent_ = new_context_.trigger_percent_; + entry.second.type_ = new_context_.type_; + } + const int64_t point_map_size = sizeof(bool) * MAX_DDL_SIM_POINT_ID; + if (fixed_point_array_.count() > 0) { + if (nullptr == entry.second.fixed_points_) { + void *buf = ObDDLSimPointMgr::get_instance().get_arena_allocator().alloc(point_map_size); + if (OB_ISNULL(buf)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), K(point_map_size)); + } else { + bool *tmp_map = new bool[MAX_DDL_SIM_POINT_ID]; + memset(tmp_map, 0, point_map_size); + entry.second.fixed_points_ = tmp_map; + } + } + } + if (nullptr != entry.second.fixed_points_) { + memset(entry.second.fixed_points_, 0, point_map_size); + } + for (int64_t i = 0; OB_SUCC(ret) && i < fixed_point_array_.count(); ++i) { + const int64_t point_id = fixed_point_array_.at(i); + entry.second.fixed_points_[point_id] = true; + } + LOG_INFO("update tenant param of ddl sim point success", K(new_context_), K(fixed_point_array_), K(entry.second)); + return OB_SUCCESS; + } +public: + const ObTenantDDLSimContext &new_context_; + const ObIArray &fixed_point_array_; +}; + +int ObDDLSimPointMgr::set_tenant_param(const uint64_t tenant_id, const ObConfigIntListItem &rand_param, const ObConfigIntListItem &fixed_param) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(tenant_id)); + } else { + ObArray fixed_point_array; + ObTenantDDLSimContext tenant_context; + tenant_context.tenant_id_ = tenant_id; + // 1. fill random param if need + if (rand_param.size() >= 2) { + tenant_context.seed_ = rand_param[0]; + tenant_context.trigger_percent_ = rand_param[1]; + if (rand_param.size() >= 3) { + tenant_context.type_ = static_cast(rand_param[2]); + } + } + // 2. try push tenant context into tenant map + if (OB_FAIL(tenant_map_.set_refactored(tenant_id, tenant_context))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("set tenant context failed", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + // 3. fill fixed param if need + for (int i = 0; OB_SUCC(ret) && i < fixed_param.size(); ++i) { + const int64_t point_id = fixed_param[i]; + if (point_id <= MIN_DDL_SIM_POINT_ID || point_id >= MAX_DDL_SIM_POINT_ID) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(point_id)); + ret = OB_SUCCESS; // ignore invalid point id + } else if (!all_points_[point_id].is_valid()) { + // do nothing + } else if (OB_FAIL(fixed_point_array.push_back(point_id))) { + LOG_WARN("push back fixed point failed", K(ret), K(point_id), K(i)); + } + } + // 4. update tenant context + if (OB_SUCC(ret)) { + TenantContextUpdater updater(tenant_context, fixed_point_array); + if (OB_FAIL(tenant_map_.atomic_refactored(tenant_id, updater))) { + LOG_WARN("update tenant context failed", K(ret), K(tenant_id), K(tenant_context)); + } + } + } + return ret; +} + +int ObDDLSimPointMgr::generate_task_sim_map(const ObTenantDDLSimContext &tenant_context, const int64_t current_task_id, const std::initializer_list &point_ids) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(tenant_context.trigger_percent_ <= 0)) { + // skip + } else { + const uint64_t tenant_id = tenant_context.tenant_id_; + const int64_t seed = tenant_context.seed_; + const int64_t trigger_percent = tenant_context.trigger_percent_; + for (std::initializer_list::iterator it = point_ids.begin(); OB_SUCC(ret) && it != point_ids.end(); ++it) { + ObDDLSimPointID point_id = *it; + const ObDDLSimPoint &cur_sim_point = all_points_[point_id]; + if (cur_sim_point.is_valid() && (SIM_TYPE_ALL == tenant_context.type_ || tenant_context.type_ == cur_sim_point.type_)) { + srand(static_cast((tenant_id + seed) * current_task_id * point_id)); + if (rand() % 100 < trigger_percent) { + if (OB_FAIL(task_sim_map_.set_refactored(TaskSimPoint(tenant_id, current_task_id, point_id), 0))) { + if (OB_HASH_EXIST != ret) { + LOG_WARN("set task sim point into map failed", K(ret), K(tenant_id), K(current_task_id), K(point_id)); + } else { + ret = OB_SUCCESS; + } + } + } + } + } + } + return ret; +} + +class SimCountUpdater +{ +public: + explicit SimCountUpdater(int64_t step) : step_(step), old_trigger_count_(0) {} + ~SimCountUpdater() = default; + int operator() (hash::HashMapPair &entry) { + old_trigger_count_ = entry.second; + entry.second += step_; + return OB_SUCCESS; + } +public: + int64_t step_; + int64_t old_trigger_count_; +}; + +int ObDDLSimPointMgr::try_sim(const uint64_t tenant_id, const uint64_t task_id, const std::initializer_list &point_ids) +{ + int ret = OB_SUCCESS; + ObTenantDDLSimContext tenant_context; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || task_id < 0)) { + LOG_INFO("invalid argument for ddl errsim, ignore", K(tenant_id), K(task_id)); + } else if (0 == task_id) { + // task_id maybe not set, skip + } else if (OB_FAIL(tenant_map_.get_refactored(tenant_id, tenant_context))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("get tenant context failed", K(ret), K(tenant_id)); + } else { + ret = OB_SUCCESS; + } + } else if (OB_FAIL(generate_task_sim_map(tenant_context, task_id, point_ids))) { + LOG_WARN("generate task sim map failed", K(ret), K(tenant_context), K(task_id)); + } else { + for (std::initializer_list::iterator it = point_ids.begin(); OB_SUCC(ret) && it != point_ids.end(); ++it) { + ObDDLSimPointID point_id = *it; + bool need_execute = false; + TaskSimPoint sim_key(tenant_id, task_id, point_id); + if (point_id <= MIN_DDL_SIM_POINT_ID || point_id > MAX_DDL_SIM_POINT_ID || !all_points_[point_id].is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid point id", K(ret), K(point_id)); + } else if (nullptr != tenant_context.fixed_points_ && tenant_context.fixed_points_[point_id]) { + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(task_sim_map_.set_refactored(TaskSimPoint(tenant_id, task_id, point_id), 0))) { + if (OB_HASH_EXIST != tmp_ret) { + LOG_WARN("set fixed point into task sim map failed", K(tmp_ret), K(tenant_id), K(task_id), K(point_id)); + } + } + } + if (OB_SUCC(ret)) { + SimCountUpdater inc(1); + if (OB_FAIL(task_sim_map_.atomic_refactored(sim_key, inc))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("check task need sim ddl point failed", K(ret), K(sim_key)); + } else { + ret = OB_SUCCESS; + } + } else { + need_execute = inc.old_trigger_count_ < all_points_[sim_key.point_id_].action_->max_repeat_times_; + if (need_execute) { + ret = all_points_[point_id].action_->execute(); + LOG_INFO("ddl sim point executed", K(ret), K(sim_key)); + } else { + SimCountUpdater dec(-1); + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(task_sim_map_.atomic_refactored(sim_key, dec))) { + LOG_WARN("decrease trigger count failed", K(tmp_ret), K(sim_key)); + } + } + } + } + } + } + return ret; +} + +int ObDDLSimPointMgr::get_sim_point(const int64_t idx, ObDDLSimPoint &sim_point) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(idx < MIN_DDL_SIM_POINT_ID || idx >= MAX_DDL_SIM_POINT_ID)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(idx)); + } else { + sim_point = all_points_[idx]; + } + return ret; +} + +class SimCountCollector +{ +public: + SimCountCollector(ObIArray &task_sim_points, ObIArray &sim_counts) + : task_sim_points_(task_sim_points), sim_counts_(sim_counts) {} + ~SimCountCollector() = default; + int operator() (hash::HashMapPair &entry) { + int ret = OB_SUCCESS; + if (OB_FAIL(task_sim_points_.push_back(entry.first))) { + LOG_WARN("push back task sim point failed", K(ret), K(entry.first)); + } else if (OB_FAIL(sim_counts_.push_back(entry.second))) { + LOG_WARN("push back sim count failed", K(ret), K(entry.second)); + } + return ret; + } +public: + ObIArray &task_sim_points_; + ObIArray &sim_counts_; +}; + +int ObDDLSimPointMgr::get_sim_stat(ObIArray &task_sim_points, ObIArray &sim_counts) +{ + int ret = OB_SUCCESS; + task_sim_points.reset(); + sim_counts.reset(); + int64_t entry_count = 0; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", K(ret), K(is_inited_)); + } else { + entry_count = task_sim_map_.size(); + } + if (OB_SUCC(ret) && entry_count > 0) { + const int64_t reserve_count = entry_count + 1000L; + if (OB_FAIL(task_sim_points.reserve(reserve_count))) { + LOG_WARN("reserve array capacity failed", K(ret), K(entry_count), K(reserve_count)); + } else if (OB_FAIL(sim_counts.reserve(reserve_count))) { + LOG_WARN("reserve array capacity failed", K(ret), K(entry_count), K(reserve_count)); + } + } + if (OB_SUCC(ret) && entry_count > 0) { + SimCountCollector stat_collector(task_sim_points, sim_counts); + if (OB_FAIL(task_sim_map_.foreach_refactored(stat_collector))) { + LOG_WARN("collect ddl sim entry failed", K(ret)); + } + } + return ret; +} diff --git a/src/share/ob_ddl_sim_point.h b/src/share/ob_ddl_sim_point.h new file mode 100644 index 0000000000..bbfee35356 --- /dev/null +++ b/src/share/ob_ddl_sim_point.h @@ -0,0 +1,187 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_SHARE_OB_DDL_SIM_POINT_H +#define OCEANBASE_SHARE_OB_DDL_SIM_POINT_H + +#include +#include "lib/container/ob_array.h" +#include "lib/hash/ob_hashmap.h" +#include "lib/hash/ob_hashset.h" +#include "lib/lock/ob_drw_lock.h" +#include "share/config/ob_config.h" + +namespace oceanbase +{ +namespace share +{ + +enum ObSimType +{ + SIM_TYPE_ALL = 0, + SIM_TYPE_DDL = 1, + SIM_TYPE_TRANSFER = 2, +}; + +struct ObDDLSimAction +{ +public: + explicit ObDDLSimAction(const int64_t max_repeat_times) : max_repeat_times_(max_repeat_times) {} + virtual int execute() = 0; + DECLARE_PURE_VIRTUAL_TO_STRING; +public: + int64_t max_repeat_times_; +}; + +template +struct ObDDLSimRetAction : public ObDDLSimAction +{ +public: + ObDDLSimRetAction(const int64_t max_repeat_times, const std::initializer_list &ret_codes) + : ObDDLSimAction(max_repeat_times) { + std::initializer_list::iterator it = ret_codes.begin(); + for (int64_t i = 0; i < COUNT && it != ret_codes.end(); ++i, ++it) { + ret_codes_[i] = *it; + } + } + virtual int execute() override { + int64_t i = ObRandom::rand(0, COUNT - 1); + return ret_codes_[i]; + } + VIRTUAL_TO_STRING_KV("repeat_times", max_repeat_times_, "ret_codes", ObArrayWrap(ret_codes_, COUNT)); +public: + int ret_codes_[COUNT]; +}; + +struct ObDDLSimSleepAction : public ObDDLSimAction +{ +public: + ObDDLSimSleepAction(const int64_t max_repeat_times, const int min_sleep_ms, const int max_sleep_ms = 0) + : ObDDLSimAction(max_repeat_times), min_sleep_ms_(min_sleep_ms), max_sleep_ms_(max(min_sleep_ms, max_sleep_ms)) {} + virtual int execute() override { + int64_t sleep_ms = ObRandom::rand(min_sleep_ms_, max_sleep_ms_); + ob_usleep(static_cast(sleep_ms * 1000L)); + return OB_SUCCESS; + } + VIRTUAL_TO_STRING_KV("repeat_times", max_repeat_times_, K_(min_sleep_ms), K_(max_sleep_ms)); +public: + int min_sleep_ms_; + int max_sleep_ms_; +}; + +enum ObDDLSimPointID // check unique id by compiler +{ + MIN_DDL_SIM_POINT_ID = 0, +#define DDL_SIM_POINT_DEFINE(type, name, id, desc, action) name = id, +#include "share/ob_ddl_sim_point_define.h" + MAX_DDL_SIM_POINT_ID +}; +#undef DDL_SIM_POINT_DEFINE + +struct ObDDLSimPoint +{ +public: + ObDDLSimPoint() + : id_(MIN_DDL_SIM_POINT_ID), type_(SIM_TYPE_ALL), name_(nullptr), desc_(nullptr), action_(nullptr) {} + ObDDLSimPoint(const ObDDLSimPointID id, const ObSimType type, const char *name, const char *desc, ObDDLSimAction *action) + : id_(id), type_(type), name_(name), desc_(desc), action_(action) {} + bool is_valid() const { return id_ > MIN_DDL_SIM_POINT_ID && id_ < MAX_DDL_SIM_POINT_ID && nullptr != name_ && nullptr != desc_ && nullptr != action_; } + TO_STRING_KV(K(id_), K_(type), K(name_), K(desc_), KP(action_), K(action_size_)); + +public: + ObDDLSimPointID id_; + ObSimType type_; + const char *name_; + const char *desc_; + union { + ObDDLSimAction *action_; + int64_t action_size_; + }; +}; + +struct ObTenantDDLSimContext +{ +public: + ObTenantDDLSimContext() : tenant_id_(0), type_(SIM_TYPE_ALL), seed_(0), trigger_percent_(0), fixed_points_(nullptr) {} + DECLARE_TO_STRING; +public: + uint64_t tenant_id_; + ObSimType type_; + int64_t seed_; + int64_t trigger_percent_; + bool *fixed_points_; +}; + +class ObDDLSimPointMgr +{ +public: + struct TaskSimPoint + { + public: + TaskSimPoint(const uint64_t tenant_id = 0, const uint64_t task_id = 0, const ObDDLSimPointID point_id = MIN_DDL_SIM_POINT_ID) + : tenant_id_(tenant_id), task_id_(task_id), point_id_(point_id) {} + int hash(uint64_t &hash_val) const + { + hash_val = 0; + hash_val = murmurhash(&tenant_id_, sizeof(tenant_id_), hash_val); + hash_val = murmurhash(&task_id_, sizeof(task_id_), hash_val); + hash_val = murmurhash(&point_id_, sizeof(point_id_), hash_val); + return OB_SUCCESS; + } + bool operator ==(const TaskSimPoint &other) const + { + return tenant_id_ == other.tenant_id_ && task_id_ == other.task_id_ && point_id_ == other.point_id_; + } + TO_STRING_KV(K(tenant_id_), K(task_id_), K(point_id_)); + public: + uint64_t tenant_id_; + uint64_t task_id_; + ObDDLSimPointID point_id_; + }; +public: + static ObDDLSimPointMgr &get_instance(); + int init(); + int set_tenant_param(const uint64_t tenant_id, const ObConfigIntListItem &rand_param, const ObConfigIntListItem &fixed_param); + int try_sim(const uint64_t tenant_id, const uint64_t task_id, const std::initializer_list &point_ids); + int get_sim_point(const int64_t idx, ObDDLSimPoint &sim_point) const; + int get_sim_stat(ObIArray &task_sim_points, ObIArray &sim_counts); + ObIAllocator &get_arena_allocator() { return arena_; } + TO_STRING_KV(K(is_inited_), K(task_sim_map_.size())); +private: + int generate_task_sim_map(const ObTenantDDLSimContext &tenant_context, const int64_t current_task_id, const std::initializer_list &point_ids); +private: + ObDDLSimPointMgr(); + DISABLE_COPY_ASSIGN(ObDDLSimPointMgr); + +private: + bool is_inited_; + ObDDLSimPoint all_points_[MAX_DDL_SIM_POINT_ID]; + hash::ObHashMap task_sim_map_; + hash::ObHashMap tenant_map_; + ObArenaAllocator arena_; +}; + +#ifdef ERRSIM +#define DDL_SIM(tenant_id, task_id, sim_point, args...) ::oceanbase::share::ObDDLSimPointMgr::get_instance().try_sim(tenant_id, task_id, {sim_point, ##args}) +#define DDL_SIM_WHEN(condition, tenant_id, task_id, sim_point, args...) (condition) ? DDL_SIM(tenant_id, task_id, sim_point, args) : OB_SUCCESS +#else +#define DDL_SIM(...) OB_SUCCESS +#define DDL_SIM_WHEN(...) OB_SUCCESS +#endif + + + + +} // namespace share +} // namespace oceanbase + +#endif//OCEANBASE_SHARE_OB_DDL_SIM_POINT_H diff --git a/src/share/ob_ddl_sim_point_define.h b/src/share/ob_ddl_sim_point_define.h new file mode 100644 index 0000000000..423b25d977 --- /dev/null +++ b/src/share/ob_ddl_sim_point_define.h @@ -0,0 +1,161 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +// DDL_SIM_POINT_DEFINE accept parameters like: (type, name, id, desc, action, args...) +// +// Available actions: +// RET_ERR(ret_code, ...) // return fixed or random ret_code, execute once +// N_RET_ERR(max_repeat_times, ret_code, ...) // return fixed or random ret_code, execute multi times +// SLEEP_MS(min_sleep_ms, [max_sleep_ms]) // sleep random time between min_sleep_ms and max_sleep_ms, execute once +// N_SLEEP_MS(max_repeat_times, min_sleep_ms, [max_sleep_ms]) // sleep random time between min_sleep_ms and max_sleep_ms, execute multi times +// +// Examples: +// DDL_SIM_POINT_DEFINE(DDL_SIM_POINT_EXAMPLE_1, 1, "return fixed ret_code once", RET_ERR(OB_TASK_EXPIRED)) +// DDL_SIM_POINT_DEFINE(DDL_SIM_POINT_EXAMPLE_2, 2, "return random ret_code once", RET_ERR(OB_NOT_MASTER, OB_EAGAIN, OB_INVALID_ARGUMENT, OB_NOT_INIT)) +// DDL_SIM_POINT_DEFINE(DDL_SIM_POINT_EXAMPLE_3, 3, "return fixed ret_code for 3 times", N_RET_ERR(3, OB_EAGAIN)) +// DDL_SIM_POINT_DEFINE(DDL_SIM_POINT_EXAMPLE_4, 4, "return random ret_code for 5 times", N_RET_ERR(5, OB_NOT_MASTER, OB_EAGAIN, OB_INVALID_ARGUMENT, OB_NOT_INIT)) +// DDL_SIM_POINT_DEFINE(DDL_SIM_POINT_EXAMPLE_5, 5, "sleep 10ms once", SLEEP_MS(10)) +// DDL_SIM_POINT_DEFINE(DDL_SIM_POINT_EXAMPLE_6, 6, "random sleep 5-10ms once", SLEEP_MS(5, 10)) +// DDL_SIM_POINT_DEFINE(DDL_SIM_POINT_EXAMPLE_7, 7, "sleep 10ms, execute 2 times", N_SLEEP_MS(2, 10)) +// DDL_SIM_POINT_DEFINE(DDL_SIM_POINT_EXAMPLE_8, 8, "random sleep 5-10ms, execute 3 times", N_SLEEP_MS(3, 5, 10)) +#ifdef DDL_SIM_POINT_DEFINE +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, SCHEDULE_DDL_TASK_FAILED, 1, "schedule ddl task failed, rely on recover thread", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, INSERT_CHILD_DDL_TASK_RECORD_EXIST, 2, "insert ddl task record, but the record already exist", RET_ERR(OB_ENTRY_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, ON_COLUMN_CHECKSUM_REPLY_FAILED, 3, "receive column checksum reply, but process failed", RET_ERR(OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_COMPLETE_SSTABLE_FAILED, 4, "update data complement failed", RET_ERR(OB_ENTRY_NOT_EXIST, OB_TASK_EXPIRED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REPORT_DDL_CHECKSUM_FAILED, 5, "report ddl checksum failed", N_RET_ERR(5, OB_NOT_MASTER, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CHECK_SCHEMA_TRANS_END_SLOW, 6, "check schema trans end execute slow", N_SLEEP_MS(1000, 100, 200)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, WRITE_DUPLICATED_DDL_REDO_LOG, 7, "write duplicated ddl redo logs", RET_ERR(OB_NOT_MASTER)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, PUSH_TASK_INTO_QUEUE_FAILED, 8, "push task into queue failed", RET_ERR(OB_STATE_NOT_MATCH, OB_ENTRY_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REMOVE_TASK_FROM_QUEUE_FAILED, 9, "remove task from queue failed", RET_ERR(OB_STATE_NOT_MATCH, OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, GET_TASK_FROM_QUEUE_FAILED, 10, "get task from queue failed", RET_ERR(OB_STATE_NOT_MATCH, OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CANCEL_SYS_TASK_FAILED, 11, "cancel sys task failed", RET_ERR(OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TABLE_UPDATE_TASK_INFO_FAILED, 12, "redef task update task info failed", RET_ERR(OB_EAGAIN, OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TABLE_ABORT_FAILED, 13, "redef task abort failed", RET_ERR(OB_EAGAIN, OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TABLE_COPY_DEPES_FAILED, 14, "redef task copy deps failed", RET_ERR(OB_EAGAIN, OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TABLE_FINISH_FAILED, 15, "redef task finish failed", RET_ERR(OB_EAGAIN, OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, HEART_BEAT_UPDATE_ACTIVE_TIME, 16, "heart beat mgr update task active time", RET_ERR(OB_ALLOCATE_MEMORY_FAILED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_SCHEDULER_STOPPED, 17, "ddl scheduler stopped", RET_ERR(OB_NOT_RUNNING)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_SCHEDULER_ADD_SYS_TASK_FAILED, 18, "ddl scheduler add sys task failed", RET_ERR(OB_ENTRY_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_SCHEDULER_REMOVE_SYS_TASK_FAILED, 19, "ddl scheduler remove sys task failed", RET_ERR(OB_ENTRY_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, TASK_STATUS_OPERATOR_SLOW, 20, "ddl task status query or modify slow", N_SLEEP_MS(100, 1000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, GET_FREEZE_INFO_FAILED, 21, "get freeze info failed", RET_ERR(OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CHECK_TRANS_END_FAILED, 22, "check trans end failed", RET_ERR(OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CHECK_TENANT_STANDBY_FAILED, 23, "check tenant standby failed", RET_ERR(OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REPORT_DDL_RET_CODE_FAILED, 24, "report ddl ret code failed", RET_ERR(OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, BATCH_RELEASE_SNAPSHOT_FAILED, 25, "release snapshot failed", N_RET_ERR(10, OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, QUERY_SQL_PLAN_MONITOR_SLOW, 26, "query sql plan monitor slow", N_SLEEP_MS(100, 1000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CALC_COLUMN_CHECKSUM_RPC_SLOW, 27, "calculate column checksum rpc slow", N_SLEEP_MS(1000, 100, 200)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CHECK_MODIFY_TIME_ELAPSED_SLOW, 28, "check modify time elapsed rpc slow", N_SLEEP_MS(1000, 100, 200)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CREATE_INDEX_BUILD_SSTABLE_FAILED, 29, "create index build sstable failed", N_RET_ERR(10, OB_REPLICA_NOT_READABLE, OB_ERR_INSUFFICIENT_PX_WORKER)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, PROCESS_COLUMN_CHECKSUM_RESPONSE_SLOW, 30, "process column checksum response slow", N_SLEEP_MS(1000, 100, 200)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, PROCESS_BUILD_SSTABLE_RESPONSE_SLOW, 31, "process build sstable response slow", N_SLEEP_MS(1000, 100, 200)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_TASK_HOLD_SNAPSHOT_FAILED, 32, "ddl task hold snapshot failed", N_RET_ERR(5, OB_TIMEOUT, OB_SNAPSHOT_DISCARDED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CHECK_OLD_COMPLEMENT_TASK_FAILED, 33, "check old complement task failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_TASK_RELEASE_SNAPSHOT_FAILED, 34, "create index relase snapshot failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_TASK_COLLECT_LONGOPS_STAT_FAILED, 35, "collect longops stat failed", N_RET_ERR(5, OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, PROCESS_CHILD_TASK_FINISH_FAILED, 36, "process child task finish failed", RET_ERR(OB_ALLOCATE_MEMORY_FAILED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_INDEX_STATUS_FAILED, 37, "update index status failed", RET_ERR(OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DROP_INDEX_RPC_FAILED, 38, "drop index rpc failed", RET_ERR(OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REFRESH_SCHEMA_VERSION_FAILED, 39, "refresh schema version failed", RET_ERR(OB_SCHEMA_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, SINGLE_REPLICA_EXECUTOR_BUILD_FAILED, 40, "single replica executor build failed", RET_ERR(OB_ALLOCATE_MEMORY_FAILED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, SINGLE_REPLICA_EXECUTOR_SCHEDULE_TASK_FAILED, 41, "single replica executor schedule task failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_SSTABLE_BULD_TASK_INIT_FAILED, 42, "redef sstable build task init failed", RET_ERR(OB_EAGAIN, OB_ALLOCATE_MEMORY_FAILED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_SSTABLE_BULD_TASK_PROCESS_FAILED, 43, "redef sstable build task process failed", RET_ERR(OB_ALLOCATE_MEMORY_FAILED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REAP_OLD_REPLICA_BUILD_TASK_FAILED, 44, "reap old replica build task failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, LOCK_TABLE_FAILED, 45, "lock table failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT, OB_NOT_MASTER)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UNLOCK_TABLE_FAILED, 46, "unlock table failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT, OB_NOT_MASTER)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, BUILD_REPLICA_ASYNC_TASK_FAILED, 47, "build replica async t task failed", RET_ERR(OB_EAGAIN, OB_NOT_MASTER)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_CHECK_TABLE_EMPTY_FAILED, 48, "redef task check table empty failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_GET_CHECKSUM_COLUMNS_FAILED, 49, "redef task get checksum columns failed", RET_ERR(OB_EAGAIN)) + +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, ADD_CONSTRAINT_DDL_TASK_FAILED, 52, "add constraint ddl task failed", RET_ERR(OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, ADD_FOREIGN_KEY_DDL_TASK_FAILED, 53, "add foreign key ddl task failed", RET_ERR(OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, SYNC_AUTOINC_POSITION_FAILED, 54, "sync auto inc position failed", RET_ERR(OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, MODIFY_AUTOINC_FAILED, 55, "redef task modify auto inc failed", RET_ERR(OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_FINISH_FAILED, 56, "redef task finish failed", N_RET_ERR(10, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_CHECK_HEALTH_FAILED, 57, "redef task check health failed", N_RET_ERR(5, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_SYNC_STATS_INFO_FAILED, 58, "redef task sync stats info failed", N_RET_ERR(5, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_SYNC_TABLET_AUTOINC_SEQ_FAILED, 59, "redef task sync tablet auto inc sequence failed", RET_ERR(OB_TIMEOUT, OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_CHECK_REBUILD_CONSTRAINT_FAILED, 60, "redef task check rebuild constraint failed", RET_ERR(OB_ALLOCATE_MEMORY_FAILED, OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_GET_ALL_TABLET_COUNT_FAILED, 61, "redef task get all tablet count failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_TASK_INIT_BY_RECORD_FAILED, 62, "ddl task init by record failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_TASK_ENCODE_MESSAGE_FAILED, 63, "ddl task encode message failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_TASK_DECODE_MESSAGE_FAILED, 64, "ddl task decode message failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, RETRY_TASK_UPDATE_BY_CHILD_FAILED, 65, "retry task update by child failed", RET_ERR(OB_STATE_NOT_MATCH, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_TASK_RECORD_ON_TASK_STATUS_FAILED, 66, "update task record on task status failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_TASK_RECORD_ON_SNAPSHOT_VERSION_FAILED, 67, "update task record on snapshot version failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_TASK_RECORD_ON_RET_CODE_FAILED, 68, "update task record on ret code failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_TASK_RECORD_ON_EXECUTION_ID_FAILED, 69, "update task record on execution id failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_TASK_RECORD_ON_MESSAGE_FAILED, 70, "update task record on message failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_TASK_RECORD_ON_STATUS_AND_MESSAGE_FAILED, 71, "update task record on status and message failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DELETE_TASK_RECORD_FAILED, 72, "delete task record failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, QUERY_TASK_RECORD_CHECK_CONFLICT_DDL_FAILED, 73, "query task record check conflict ddl failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, SELECT_TASK_RECORD_FOR_UPDATE_FAILED, 74, "select task record for update failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, KILL_TASK_BY_INNER_SQL_FAILED, 75, "kill task by inner sql failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, RETRY_TASK_DROP_SCHEMA_FAILED, 76, "rety task drop schema failed", N_RET_ERR(10, OB_TIMEOUT, OB_TRY_LOCK_ROW_CONFLICT, OB_TRANSACTION_SET_VIOLATION)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, RETRY_TASK_WAIT_ALTER_TABLE_FAILED, 77, "rety task wait alter table failed", N_RET_ERR(5, OB_TIMEOUT, OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, RETRY_TASK_CHECK_SCHEMA_CHANGED_FAILED, 78, "rety task check schema changed failed", N_RET_ERR(5, OB_TIMEOUT, OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, RETRY_TASK_CHECK_SCHEMA_CHANGED_SLOW, 79, "rety task check schema changed slow", N_SLEEP_MS(5, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_REDEF_TASK_CHECK_COLUMN_CHECKSUM_FAILED, 80, "ddl redef task check column checksum slow", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_TASK_SEND_BUILD_REPLICA_REQUEST_FAILED, 81, "ddl task send build replica failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, TABLE_REDEF_TASK_CHECK_USE_HEAP_PLAN_FAILED, 82, "table redef task use heap plan failed", RET_ERR(OB_EAGAIN)) + +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_COPY_INDEX_FAILED, 84, "table redef task copy index failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_COPY_CONSTRAINT_FAILED, 85, "table redef task copy constraint failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_COPY_FOREIGN_KEY_FAILED, 86, "table redef task copy foreign key failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, REDEF_TASK_COPY_DEPENDENT_OBJECTS_FAILED, 87, "redef task copy dependent object failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_TASK_TAKE_EFFECT_FAILED, 88, "ddl task take effect failed", N_RET_ERR(5, OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, TABLE_REDEF_TASK_REPENDING_FAILED, 89, "table redef task repending failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, TABLE_REDEF_TASK_GET_DIRECT_LOAD_JOB_STAT_FAILED, 90, "table redef task get direct load job stat failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, TABLE_REDEF_TASK_GET_DIRECT_LOAD_JOB_STAT_SLOW, 91, "table redef task get direct load stat slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CONSTRAINT_TASK_SET_VALIDATED, 92, "constraint task set validated failed", N_RET_ERR(5, OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CONSTRAINT_TASK_ROLL_BACK_SCHEMA, 93, "constraint task rollback schema failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, VALIDATE_CONSTRAINT_OR_FOREIGN_KEY_TASK_FAILED, 94, "check constraint valid task failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_AUTOINC_SEQUENCE_FAILED, 95, "update auto inc sequence failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_ERR_MESSAGE_OPERATOR_SLOW, 96, "ddl error message operator slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_ERR_MESSAGE_OPERATOR_REPORT_FAILED, 97, "ddl error message operator report failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_ERR_MESSAGE_OPERATOR_LOAD_FAILED, 98, "ddl error message operator load failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_ERR_MESSAGE_OPERATOR_GENERATE_FAILED, 99, "ddl error message operator generate message failed", N_RET_ERR(5, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, GENERATE_BUILD_REPLICA_SQL, 100, "generate build replica sql failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, GET_DATA_FORMAT_VERISON_FAILED, 101, "get data format version failed", N_RET_ERR(10, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CHECK_TABLET_CHECKSUM_STATUS_FAILED, 102, "check tablet checksum status failed", N_RET_ERR(10, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CHECK_TABLET_CHECKSUM_STATUS_SLOW, 103, "check tablet checksum status slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_DDL_CHECKSUM_FAILED, 104, "update ddl checksum failed", N_RET_ERR(10, OB_TRY_LOCK_ROW_CONFLICT, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UPDATE_DDL_CHECKSUM_SLOW, 105, "update ddl checksum slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, GET_TABLE_COLUMN_CHECKSUM_FAILED, 106, "get table column checksum failed", N_RET_ERR(10, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, GET_TABLE_COLUMN_CHECKSUM_SLOW, 107, "get table column slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, GET_TABLET_COLUMN_CHECKSUM_FAILED, 108, "get tablet column checksum failed", N_RET_ERR(10, OB_EAGAIN, OB_TIMEOUT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, GET_TABLET_COLUMN_CHECKSUM_SLOW, 109, "get tablet column slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DELETE_DDL_CHECKSUM_FAILED, 110, "delete ddl checksum failed", N_RET_ERR(10, OB_EAGAIN, OB_NOT_MASTER, OB_TRY_LOCK_ROW_CONFLICT)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DELETE_DDL_CHECKSUM_SLOW, 111, "delete ddl checksum slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UNIQUE_INDEX_CHECKER_SCAN_TABLE_WITH_CHECKSUM_FAILED, 112, "unique index checker scan table with checksum failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UNIQUE_INDEX_CHECKER_GENERATE_INDEX_OUTPUT_PARAM_FAILED, 113, "unique index checker generate index output param failed", RET_ERR(OB_ALLOCATE_MEMORY_FAILED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, UNIQUE_INDEX_CHECKER_WAIT_TRANS_END_FAILED, 114, "unique index checker wait trans end failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CREATE_HIDDEN_TABLE_RPC_FAILED , 115, "create hidden table rpc failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, CREATE_HIDDEN_TABLE_RPC_SLOW , 116, "create hidden table rpc slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, COPY_TABLE_DEPENDENTS_RPC_FAILED, 117, "copy table dependents rpc failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, COPY_TABLE_DEPENDENTS_RPC_SLOW, 118, "copy table dependents rpc slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, FINISH_REDEF_TABLE_RPC_FAILED, 119, "finish redef table rpc failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, FINISH_REDEF_TABLE_RPC_SLOW, 120, "finish redef table rpc slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, ABORT_REDEF_TABLE_RPC_FAILED, 121, "abort redef table rpc failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, ABORT_REDEF_TABLE_RPC_SLOW, 122, "abort redef table rpc slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, WAIT_REDEF_TASK_REACH_PENDING_FAILED, 123, "wait redef task reach pending failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, WAIT_REDEF_TASK_REACH_PENDING_SLOW, 124, "wait redef task reach pending slow", N_SLEEP_MS(10, 1000, 2000)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_REDO_WRITER_SPEED_CONTROL_FAILED, 125, "ddl redo log writer speed control failed", RET_ERR(OB_TASK_EXPIRED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_REDO_WRITER_WRITE_MACRO_LOG_FAILED, 126, "ddl redo writer write macro log failed", RET_ERR(OB_STATE_NOT_MATCH, OB_NOT_MASTER, OB_TASK_EXPIRED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_REDO_WRITER_WRITE_START_LOG_FAILED, 127, "ddl redo writer write start log failed", RET_ERR(OB_STATE_NOT_MATCH, OB_NOT_MASTER, OB_TASK_EXPIRED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_REDO_WRITER_WRITE_COMMIT_LOG_FAILED, 128, "ddl redo writer write commit log failed", RET_ERR(OB_STATE_NOT_MATCH, OB_NOT_MASTER, OB_TASK_EXPIRED)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, DDL_INSERT_SSTABLE_GET_NEXT_ROW_FAILED, 129, "ddl insert sstable get next row failed", RET_ERR(OB_REPLICA_NOT_READABLE, OB_ERR_INSUFFICIENT_PX_WORKER, OB_TABLE_NOT_EXIST)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, COMPLEMENT_DATA_TASK_SPLIT_RANGE_FAILED, 130, "complement data task split range failed", RET_ERR(OB_EAGAIN)) +DDL_SIM_POINT_DEFINE(SIM_TYPE_DDL, COMPLEMENT_DATA_TASK_LOCAL_SCAN_FAILED, 131, "complement data task local scan failed", RET_ERR(OB_EAGAIN)) +#endif diff --git a/src/share/ob_ddl_task_executor.h b/src/share/ob_ddl_task_executor.h index 8e631e8b45..fba81e9870 100644 --- a/src/share/ob_ddl_task_executor.h +++ b/src/share/ob_ddl_task_executor.h @@ -84,7 +84,7 @@ private: || common::OB_ERR_SHARED_LOCK_CONFLICT == ret_code || common::OB_ERR_WAIT_REMOTE_SCHEMA_REFRESH == ret_code || common::OB_SCHEMA_EAGAIN == ret_code || common::OB_ERR_REMOTE_SCHEMA_NOT_FULL == ret_code || common::OB_ERR_EXCLUSIVE_LOCK_CONFLICT == ret_code || common::OB_ERR_EXCLUSIVE_LOCK_CONFLICT == ret_code || common::OB_ERR_EXCLUSIVE_LOCK_CONFLICT_NOWAIT == ret_code || common::OB_TRANS_STMT_NEED_RETRY == ret_code || common::OB_SCHEMA_NOT_UPTODATE == ret_code - || common::OB_TRANSACTION_SET_VIOLATION == ret_code || common::OB_TRANS_CANNOT_SERIALIZE == ret_code || common::OB_GTI_NOT_READY == ret_code + || common::OB_TRANSACTION_SET_VIOLATION == ret_code || common::OB_TRY_LOCK_ROW_CONFLICT == ret_code || common::OB_TRANS_CANNOT_SERIALIZE == ret_code || common::OB_GTI_NOT_READY == ret_code || common::OB_TRANS_WEAK_READ_VERSION_NOT_READY == ret_code || common::OB_REPLICA_NOT_READABLE == ret_code || common::OB_ERR_INSUFFICIENT_PX_WORKER == ret_code || common::OB_EXCEED_MEM_LIMIT == ret_code || common::OB_INACTIVE_SQL_CLIENT == ret_code || common::OB_INACTIVE_RPC_PROXY == ret_code || common::OB_LS_OFFLINE == ret_code; } diff --git a/src/sql/engine/cmd/ob_database_executor.cpp b/src/sql/engine/cmd/ob_database_executor.cpp index f4b48ed970..159c4fdf9a 100644 --- a/src/sql/engine/cmd/ob_database_executor.cpp +++ b/src/sql/engine/cmd/ob_database_executor.cpp @@ -23,6 +23,7 @@ #include "share/ob_common_rpc_proxy.h" #include "lib/worker.h" #include "rootserver/ob_root_utils.h" +#include "observer/ob_server_event_history_table_operator.h" namespace oceanbase { @@ -45,6 +46,7 @@ int ObCreateDatabaseExecutor::execute(ObExecContext &ctx, ObCreateDatabaseStmt & const obrpc::ObCreateDatabaseArg &create_database_arg = stmt.get_create_database_arg(); obrpc::ObCreateDatabaseArg &tmp_arg = const_cast(create_database_arg); ObString first_stmt; + obrpc::UInt64 database_id(0); if (OB_FAIL(stmt.get_first_stmt(first_stmt))) { SQL_ENG_LOG(WARN, "fail to get first stmt" , K(ret)); } else { @@ -63,14 +65,22 @@ int ObCreateDatabaseExecutor::execute(ObExecContext &ctx, ObCreateDatabaseStmt & SQL_ENG_LOG(WARN, "fail to get physical plan ctx", K(ret), K(ctx), K(common_rpc_proxy)); } else { //为什么create database的协议需要返回database_id,暂时没有用上。 - obrpc::UInt64 database_id(0); if (OB_FAIL(common_rpc_proxy->create_database(create_database_arg, database_id))) { SQL_ENG_LOG(WARN, "rpc proxy create table failed", K(ret)); } else { ctx.get_physical_plan_ctx()->set_affected_rows(1); } } - SQL_ENG_LOG(INFO, "finish execute create database.", K(ret), K(stmt)); + if (OB_NOT_NULL(common_rpc_proxy)) { + SERVER_EVENT_ADD("ddl", "create database execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "database_info", database_id, + "schema_version", create_database_arg.database_schema_.get_schema_version()); + } + SQL_ENG_LOG(INFO, "finish execute create database.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(create_database_arg)); return ret; } @@ -167,6 +177,16 @@ int ObAlterDatabaseExecutor::execute(ObExecContext &ctx, ObAlterDatabaseStmt &st SQL_ENG_LOG(WARN, "failed to update sys variable", K(ret)); } } + if (OB_NOT_NULL(common_rpc_proxy)) { + SERVER_EVENT_ADD("ddl", "alter database execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "database_info", alter_database_arg.database_schema_.get_database_id(), + "schema_version", alter_database_arg.database_schema_.get_schema_version()); + } + SQL_ENG_LOG(INFO, "finish execute alter database", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(alter_database_arg)); return ret; } @@ -238,7 +258,15 @@ int ObDropDatabaseExecutor::execute(ObExecContext &ctx, ObDropDatabaseStmt &stmt ctx.get_physical_plan_ctx()->set_affected_rows(drop_database_res.affected_row_); } } - SQL_ENG_LOG(INFO, "finish execute drop database.", K(ret), K(stmt)); + if (OB_NOT_NULL(common_rpc_proxy)) { + SERVER_EVENT_ADD("ddl", "drop database execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "database_info", database_id); + } + SQL_ENG_LOG(INFO, "finish execute drop database.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(drop_database_arg)); return ret; } @@ -268,6 +296,17 @@ int ObFlashBackDatabaseExecutor::execute(ObExecContext &ctx, ObFlashBackDatabase } else if (OB_FAIL(common_rpc_proxy->flashback_database(flashback_database_arg))) { SQL_ENG_LOG(WARN, "rpc proxy flashback database failed", K(ret)); } + + if (OB_NOT_NULL(common_rpc_proxy)) { + SERVER_EVENT_ADD("ddl", "flashback database execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "origin_db_name", flashback_database_arg.origin_db_name_, + "new_db_name", flashback_database_arg.new_db_name_); + } + SQL_ENG_LOG(INFO, "finish execute flashback database.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(flashback_database_arg)); return ret; } @@ -297,6 +336,16 @@ int ObPurgeDatabaseExecutor::execute(ObExecContext &ctx, ObPurgeDatabaseStmt &st } else if (OB_FAIL(common_rpc_proxy->purge_database(purge_database_arg))) { SQL_ENG_LOG(WARN, "rpc proxy purge database failed", K(ret)); } + + if (OB_NOT_NULL(common_rpc_proxy)) { + SERVER_EVENT_ADD("ddl", "purge database execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "database_info", purge_database_arg.db_name_); + } + SQL_ENG_LOG(INFO, "finish purge database.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(purge_database_arg)); return ret; } diff --git a/src/sql/engine/cmd/ob_ddl_executor_util.cpp b/src/sql/engine/cmd/ob_ddl_executor_util.cpp index 83ef0d7468..ad11e6f353 100644 --- a/src/sql/engine/cmd/ob_ddl_executor_util.cpp +++ b/src/sql/engine/cmd/ob_ddl_executor_util.cpp @@ -19,6 +19,7 @@ #include "share/ob_srv_rpc_proxy.h" //ObSrvRpcProxy #include "share/ob_ddl_error_message_table_operator.h" #include "sql/session/ob_sql_session_info.h" +#include "observer/ob_server_event_history_table_operator.h" namespace oceanbase { @@ -62,6 +63,14 @@ int ObDDLExecutorUtil::wait_ddl_finish( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(tenant_id), K(task_id), KP(common_rpc_proxy)); } else { + SERVER_EVENT_ADD("ddl", "start wait ddl finish", + "tenant_id", tenant_id, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", task_id, + "rpc_dest", common_rpc_proxy->get_server()); + LOG_INFO("start wait ddl finsih", K(task_id), "ddl_event_info", ObDDLEventInfo()); + int tmp_ret = OB_SUCCESS; bool is_tenant_dropped = false; bool is_tenant_standby = false; @@ -107,6 +116,14 @@ int ObDDLExecutorUtil::wait_ddl_finish( } } } + + SERVER_EVENT_ADD("ddl", "end wait ddl finish", + "tenant_id", tenant_id, + "ret", error_message.ret_code_, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", task_id, + "rpc_dest", common_rpc_proxy->get_server()); + LOG_INFO("finish wait ddl", K(ret), K(task_id), "ddl_event_info", ObDDLEventInfo(), K(error_message)); } return ret; } @@ -122,7 +139,14 @@ int ObDDLExecutorUtil::wait_build_index_finish(const uint64_t tenant_id, const i THIS_WORKER.set_timeout_ts(ObTimeUtility::current_time() + OB_MAX_USER_SPECIFIED_TIMEOUT); share::ObDDLErrorMessageTableOperator::ObBuildDDLErrorMessage error_message; is_finish = false; - LOG_INFO("wait build index finish", K(task_id)); + SERVER_EVENT_ADD("ddl", "start wait build index finish", + "tenant_id", tenant_id, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", task_id, + "is_tenant_standby", is_tenant_standby); + LOG_INFO("start wait build index finish", K(task_id), "ddl_event_info", ObDDLEventInfo()); + if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || task_id <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tenant_id), K(task_id)); @@ -153,6 +177,14 @@ int ObDDLExecutorUtil::wait_build_index_finish(const uint64_t tenant_id, const i LOG_WARN("server is stopping, check whether the ddl task finish successfully or not", K(ret), K(tenant_id), K(task_id)); } } + + SERVER_EVENT_ADD("ddl", "end wait build index finish", + "tenant_id", tenant_id, + "ret", error_message.ret_code_, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", task_id, + "is_tenant_standby", is_tenant_standby); + LOG_INFO("finish wait build index", K(ret), "ddl_event_info", ObDDLEventInfo(), K(error_message)); return ret; } @@ -175,6 +207,14 @@ int ObDDLExecutorUtil::wait_ddl_retry_task_finish( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(tenant_id), K(task_id), KP(common_rpc_proxy)); } else { + SERVER_EVENT_ADD("ddl", "start wait ddl retry task finish", + "tenant_id", tenant_id, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", task_id, + "rpc_dest", common_rpc_proxy->get_server()); + LOG_INFO("start wait ddl retry task finish", K(task_id), "ddl_event_info", ObDDLEventInfo(), K(error_message)); + bool is_tenant_dropped = false; bool is_tenant_standby = false; int tmp_ret = OB_SUCCESS; @@ -254,6 +294,14 @@ int ObDDLExecutorUtil::wait_ddl_retry_task_finish( } } affected_rows = error_message.affected_rows_; + + SERVER_EVENT_ADD("ddl", "end wait ddl retry task finish", + "tenant_id", tenant_id, + "ret", error_message.ret_code_, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", task_id, + "rpc_dest", common_rpc_proxy->get_server()); + LOG_INFO("fnish wait ddl retry task", K(ret), K(task_id), "ddl_event_info", ObDDLEventInfo(), K(error_message)); } return ret; } @@ -273,9 +321,13 @@ int ObDDLExecutorUtil::cancel_ddl_task(const int64_t tenant_id, obrpc::ObCommonR } else { LOG_WARN("failed to cancel remote sys task", K(ret), K(rpc_arg), K(rs_leader_addr)); } - } else { - LOG_INFO("succeed to cancel sys task", K(rpc_arg), K(rs_leader_addr)); } + SERVER_EVENT_ADD("ddl", "finish cancel ddl task", + "tenant_id", tenant_id, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dest", rs_leader_addr); + LOG_INFO("finish cancel ddl task", K(ret), K(rpc_arg), K(rs_leader_addr), "ddl_event_info", ObDDLEventInfo()); return ret; } diff --git a/src/sql/engine/cmd/ob_index_executor.cpp b/src/sql/engine/cmd/ob_index_executor.cpp index 4404edcc63..e1e358197e 100644 --- a/src/sql/engine/cmd/ob_index_executor.cpp +++ b/src/sql/engine/cmd/ob_index_executor.cpp @@ -27,6 +27,7 @@ #include "sql/engine/cmd/ob_partition_executor_utils.h" #include "sql/resolver/ddl/ob_flashback_stmt.h" #include "observer/ob_server.h" +#include "observer/ob_server_event_history_table_operator.h" using namespace oceanbase::common; namespace oceanbase @@ -102,6 +103,14 @@ int ObCreateIndexExecutor::execute(ObExecContext &ctx, ObCreateIndexStmt &stmt) LOG_WARN("failed to wait ddl finish", K(ret)); } } + SERVER_EVENT_ADD("ddl", "create index execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", res.index_table_id_, + "schema_version", res.schema_version_); + SQL_ENG_LOG(INFO, "finish create index execute.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(create_index_arg)); return ret; } @@ -369,6 +378,14 @@ int ObDropIndexExecutor::execute(ObExecContext &ctx, ObDropIndexStmt &stmt) } else if (OB_FAIL(wait_drop_index_finish(res.tenant_id_, res.task_id_, *my_session))) { LOG_WARN("wait drop index finish failed", K(ret)); } + SERVER_EVENT_ADD("ddl", "drop index execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", res.index_table_id_, + "schema_version", res.schema_version_); + SQL_ENG_LOG(INFO, "finish drop index execute.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(drop_index_arg)); return ret; } @@ -395,6 +412,17 @@ int ObFlashBackIndexExecutor::execute(ObExecContext &ctx, ObFlashBackIndexStmt & } else if (OB_FAIL(common_rpc_proxy->flashback_index(flashback_index_arg))) { LOG_WARN("rpc proxy flashback index failed", "dst", common_rpc_proxy->get_server(), K(ret)); } + if (OB_NOT_NULL(common_rpc_proxy)) { + SERVER_EVENT_ADD("ddl", "flashback index execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "origin_table_name", flashback_index_arg.origin_table_name_, + "new_table_name", flashback_index_arg.new_table_name_, + flashback_index_arg.new_db_name_); + } + SQL_ENG_LOG(INFO, "finish flashback index execute.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(flashback_index_arg)); return ret; } @@ -421,6 +449,17 @@ int ObPurgeIndexExecutor::execute(ObExecContext &ctx, ObPurgeIndexStmt &stmt) { } else if (OB_FAIL(common_rpc_proxy->purge_index(purge_index_arg))) { LOG_WARN("rpc proxy purge index failed", "dst", common_rpc_proxy->get_server(), K(ret)); } + if (OB_NOT_NULL(common_rpc_proxy)) { + SERVER_EVENT_ADD("ddl", "purge index execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "table_id", purge_index_arg.table_id_, + "database_id", purge_index_arg.database_id_, + purge_index_arg.table_name_); + } + SQL_ENG_LOG(INFO, "finish purge database.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(purge_index_arg)); return ret; } diff --git a/src/sql/engine/cmd/ob_table_executor.cpp b/src/sql/engine/cmd/ob_table_executor.cpp index 71526789c1..0f429d5943 100644 --- a/src/sql/engine/cmd/ob_table_executor.cpp +++ b/src/sql/engine/cmd/ob_table_executor.cpp @@ -43,6 +43,7 @@ #include "sql/ob_select_stmt_printer.h" #include "observer/ob_server_struct.h" #include "observer/ob_server.h" +#include "observer/ob_server_event_history_table_operator.h" #include "lib/worker.h" #include "share/external_table/ob_external_table_file_mgr.h" #include "share/external_table/ob_external_table_file_task.h" @@ -483,6 +484,19 @@ int ObCreateTableExecutor::execute_ctas(ObExecContext &ctx, } else { LOG_DEBUG("table exists, no need to CTAS", K(create_table_res.table_id_)); } + if (OB_NOT_NULL(common_rpc_proxy)) { + char table_info_buffer[256]; + snprintf(table_info_buffer, sizeof(table_info_buffer), "table_id:%ld, hidden_table_id:%ld", + alter_table_arg.table_id_, alter_table_arg.hidden_table_id_); + SERVER_EVENT_ADD("ddl", "create table as select execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "table_info", table_info_buffer, + "schema_version", create_table_res.schema_version_); + } + SQL_ENG_LOG(INFO, "finish create table execute.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(create_table_arg), K(alter_table_arg)); } OZ(my_session->store_query_string(cur_query)); } @@ -597,6 +611,16 @@ int ObCreateTableExecutor::execute(ObExecContext &ctx, ObCreateTableStmt &stmt) LOG_WARN("execute create table as select failed", KR(ret)); } } + if (OB_NOT_NULL(common_rpc_proxy)) { + SERVER_EVENT_ADD("ddl", "create table execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "rpc_dst", common_rpc_proxy->get_server(), + "table_info", res.table_id_, + "schema_version", res.schema_version_); + } + SQL_ENG_LOG(INFO, "finish create table execute.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(create_table_arg)); // only CTAS or create temporary table will make session_id != 0. If such table detected, set // need ctas cleanup task anyway to do some cleanup jobs @@ -1284,6 +1308,19 @@ int ObAlterTableExecutor::execute(ObExecContext &ctx, ObAlterTableStmt &stmt) } } } + char table_info_buffer[256]; + snprintf(table_info_buffer, sizeof(table_info_buffer), "table_id:%ld, hidden_table_id:%ld", + alter_table_arg.table_id_, alter_table_arg.hidden_table_id_); + + SERVER_EVENT_ADD("ddl", "alter table execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_info", table_info_buffer, + "schema_version", res.schema_version_, + alter_table_arg.inner_sql_exec_addr_); + SQL_ENG_LOG(INFO, "finish alter table execute.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(alter_table_arg), K(first_stmt)); } return ret; } @@ -2056,6 +2093,13 @@ int ObDropTableExecutor::execute(ObExecContext &ctx, ObDropTableStmt &stmt) //do nothing } } + SERVER_EVENT_ADD("ddl", "drop table execute finish", + "tenant_id", res.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "schema_id", res.schema_id_); + SQL_ENG_LOG(INFO, "finish drop table execute.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(drop_table_arg)); return ret; } @@ -2248,6 +2292,14 @@ int ObTruncateTableExecutor::execute(ObExecContext &ctx, ObTruncateTableStmt &st K(query_timeout), K(THIS_WORKER.get_timeout_remain())); } } + SERVER_EVENT_ADD("ddl", "truncate table execute finish", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_info", truncate_table_arg.table_name_, + "schema_id", res.schema_id_); + SQL_ENG_LOG(INFO, "finish truncate table execute.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), K(truncate_table_arg)); } return ret; } diff --git a/src/sql/engine/px/ob_px_sub_coord.cpp b/src/sql/engine/px/ob_px_sub_coord.cpp index 0311e88e3e..2321b7c845 100644 --- a/src/sql/engine/px/ob_px_sub_coord.cpp +++ b/src/sql/engine/px/ob_px_sub_coord.cpp @@ -340,7 +340,9 @@ int ObPxSubCoord::setup_op_input(ObExecContext &ctx, LOG_WARN("start ddl failed", K(ret)); } #ifdef ERRSIM - ret = OB_E(EventTable::EN_DDL_START_FAIL) OB_SUCCESS; + if (OB_SUCC(ret)) { + ret = OB_E(EventTable::EN_DDL_START_FAIL) OB_SUCCESS; + } #endif } } else if (IS_PX_GI(root.get_type())) { diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp index 66f87d49a9..d62d39ba51 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp @@ -273,6 +273,9 @@ int ObTenantTabletScheduler::init() schedule_batch_size = tenant_config->compaction_schedule_tablet_batch_cnt; } } // end of ObTenantConfigGuard +#ifdef ERRSIM + schedule_interval = 1000L * 1000L; // 1s +#endif if (IS_INIT) { ret = OB_INIT_TWICE; LOG_WARN("ObTenantTabletScheduler has inited", K(ret)); diff --git a/src/storage/ddl/ob_build_index_task.cpp b/src/storage/ddl/ob_build_index_task.cpp index aea38a9d54..fe34ff379b 100644 --- a/src/storage/ddl/ob_build_index_task.cpp +++ b/src/storage/ddl/ob_build_index_task.cpp @@ -19,12 +19,14 @@ #include "share/ob_get_compat_mode.h" #include "share/ob_ddl_task_executor.h" #include "share/schema/ob_tenant_schema_service.h" +#include "share/ob_ddl_sim_point.h" #include "share/scheduler/ob_dag_warning_history_mgr.h" #include "storage/compaction/ob_column_checksum_calculator.h" #include "storage/ddl/ob_ddl_redo_log_writer.h" #include "storage/ddl/ob_complement_data_task.h" #include "storage/ob_i_table.h" #include "observer/ob_server_struct.h" +#include "observer/ob_server_event_history_table_operator.h" #include "storage/blocksstable/ob_datum_row.h" #include "storage/ob_sstable_struct.h" #include "storage/tx_storage/ob_ls_service.h" @@ -147,6 +149,8 @@ int ObUniqueIndexChecker::scan_table_with_column_checksum( if (OB_UNLIKELY(!param.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(param)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UNIQUE_INDEX_CHECKER_SCAN_TABLE_WITH_CHECKSUM_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { transaction::ObTransService *trans_service = nullptr; ObTabletTableIterator iterator; @@ -224,6 +228,8 @@ int ObUniqueIndexChecker::generate_index_output_param( if (OB_UNLIKELY(!data_table_schema.is_valid() || !index_schema.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(data_table_schema), K(index_schema)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UNIQUE_INDEX_CHECKER_GENERATE_INDEX_OUTPUT_PARAM_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else { // add data table rowkey const ObRowkeyInfo &rowkey_info = data_table_schema.get_rowkey_info(); @@ -549,8 +555,8 @@ int ObUniqueIndexChecker::check_unique_index(ObIDag *dag) bool keep_report_err_msg = true; LOG_INFO("begin to report build index status & ddl error message", K(index_schema_->get_table_id()), K(*index_schema_), K(tablet_id_)); while (!dag->has_set_stop() && keep_report_err_msg) { - int64_t task_id = 0; - if (OB_SUCCESS != (tmp_ret = ObDDLErrorMessageTableOperator::get_index_task_id(*GCTX.sql_proxy_, *index_schema_, task_id))) { + ObDDLErrorMessageTableOperator::ObDDLErrorInfo info; + if (OB_SUCCESS != (tmp_ret = ObDDLErrorMessageTableOperator::get_index_task_info(*GCTX.sql_proxy_, *index_schema_, info))) { if (OB_ITER_END == tmp_ret) { keep_report_err_msg = false; LOG_INFO("get task id failed, check whether index building task is cancled", K(ret), K(tmp_ret), KPC(index_schema_)); @@ -558,7 +564,7 @@ int ObUniqueIndexChecker::check_unique_index(ObIDag *dag) LOG_INFO("get task id failed, but retry to get it", K(ret), K(tmp_ret), KPC(index_schema_)); } } else if (OB_SUCCESS != (tmp_ret = ObDDLErrorMessageTableOperator::generate_index_ddl_error_message( - ret, *index_schema_, task_id, tablet_id_.id(), self_addr, *GCTX.sql_proxy_, "\0", report_ret_code))) { + ret, *index_schema_, info.trace_id_str_, info.task_id_, info.parent_task_id_, tablet_id_.id(), self_addr, *GCTX.sql_proxy_, "\0", report_ret_code))) { LOG_WARN("fail to generate index ddl error message", K(ret), K(tmp_ret), KPC(index_schema_), K(tablet_id_), K(self_addr)); ob_usleep(RETRY_INTERVAL); if (OB_FAIL(dag_yield())) { @@ -595,6 +601,8 @@ int ObUniqueIndexChecker::wait_trans_end(ObIDag *dag) if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObUniqueIndexChecker has not been inited", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id_, task_id_, UNIQUE_INDEX_CHECKER_WAIT_TRANS_END_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id_), K(task_id_)); } else if (OB_FAIL(ls_service->get_ls(ObLSID(ls_id_), ls_handle, ObLSGetMod::DDL_MOD))) { LOG_WARN("get ls failed", K(ret), K(ls_id_)); } else { @@ -983,6 +991,17 @@ int ObSimpleUniqueCheckingTask::process() STORAGE_LOG(WARN, "fail to check unique index response", K(ret)); } } + if (OB_NOT_NULL(dag)) { + SERVER_EVENT_ADD("ddl", "simple unique check task process", + "tenant_id", tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", dag->get_task_id(), + "snapshot_version", dag->get_snapshot_version(), + "tablet_id", tablet_id_, + dag->get_ls_id()); + } + LOG_INFO("simple unique check task process.", K(ret), "ddl_event_info", ObDDLEventInfo(), KPC(dag)); return ret; } diff --git a/src/storage/ddl/ob_complement_data_task.cpp b/src/storage/ddl/ob_complement_data_task.cpp index 0ab83ac973..3c8e9d50d2 100644 --- a/src/storage/ddl/ob_complement_data_task.cpp +++ b/src/storage/ddl/ob_complement_data_task.cpp @@ -22,6 +22,7 @@ #include "share/ob_freeze_info_proxy.h" #include "share/ob_get_compat_mode.h" #include "share/schema/ob_table_dml_param.h" +#include "share/ob_ddl_sim_point.h" #include "share/schema/ob_part_mgr_util.h" #include "sql/engine/px/ob_granule_util.h" #include "sql/ob_sql_utils.h" @@ -40,6 +41,7 @@ #include "storage/lob/ob_lob_util.h" #include "logservice/ob_log_service.h" #include "storage/ddl/ob_tablet_ddl_kv_mgr.h" +#include "observer/ob_server_event_history_table_operator.h" namespace oceanbase { @@ -57,6 +59,25 @@ using namespace blocksstable; namespace storage { +void add_ddl_event(const ObComplementDataParam *param, const ObString &stmt) +{ + if (OB_NOT_NULL(param)) { + char table_id_buffer[256]; + char tablet_id_buffer[256]; + snprintf(table_id_buffer, sizeof(table_id_buffer), "source_table_id:%ld, dest_table_id:%ld", param->orig_table_id_, param->dest_table_id_); + snprintf(tablet_id_buffer, sizeof(tablet_id_buffer), "source_id:%lu, dest_id:%lu", param->orig_tablet_id_.id(), param->dest_tablet_id_.id()); + + SERVER_EVENT_ADD("ddl", stmt.ptr(), + "tenant_id", param->dest_tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", param->task_id_, + "table_id", table_id_buffer, + "schema_version", param->dest_schema_version_, + tablet_id_buffer); + } + LOG_INFO("complement data task.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(stmt), KPC(param)); +} int ObComplementDataParam::init(const ObDDLBuildSingleReplicaRequestArg &arg) { @@ -193,6 +214,8 @@ int ObComplementDataParam::split_task_ranges( } else if (OB_ISNULL(tablet_service = ls_handle.get_ls()->get_tablet_svr())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet service is nullptr", K(ret)); + } else if (OB_FAIL(DDL_SIM(MTL_ID(), task_id_, COMPLEMENT_DATA_TASK_SPLIT_RANGE_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(MTL_ID()), K(task_id_)); } else { int64_t total_size = 0; int64_t expected_task_count = 0; @@ -329,7 +352,7 @@ int ObComplementDataContext::write_start_log(const ObComplementDataParam ¶m) } else if (OB_UNLIKELY(!hidden_table_key.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid table key", K(ret), K(hidden_table_key)); - } else if (OB_FAIL(data_sstable_redo_writer_.start_ddl_redo(hidden_table_key, + } else if (OB_FAIL(data_sstable_redo_writer_.start_ddl_redo(hidden_table_key, param.task_id_, param.execution_id_, param.data_format_version_, ddl_kv_mgr_handle_))) { LOG_WARN("fail write start log", K(ret), K(hidden_table_key), K(param)); } else { @@ -675,8 +698,6 @@ int ObComplementPrepareTask::process() int ret = OB_SUCCESS; ObIDag *tmp_dag = get_dag(); ObComplementDataDag *dag = nullptr; - ObComplementWriteTask *write_task = nullptr; - ObComplementMergeTask *merge_task = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("ObComplementPrepareTask has not been inited", K(ret)); @@ -699,13 +720,15 @@ int ObComplementPrepareTask::process() param_->tablet_task_id_))) { LOG_WARN("failed to delete checksum", K(ret), KPC(param_)); } else { - LOG_INFO("finish the complement prepare task", K(ret), KPC(param_)); + LOG_INFO("finish the complement prepare task", K(ret), KPC(param_), "ddl_event_info", ObDDLEventInfo()); } if (OB_FAIL(ret)) { context_->complement_data_ret_ = ret; ret = OB_SUCCESS; } + + add_ddl_event(param_, "complement prepare task"); return ret; } @@ -773,6 +796,9 @@ int ObComplementWriteTask::process() } else if (param_->dest_tenant_id_ == param_->orig_tenant_id_) { if (OB_FAIL(local_scan_by_range())) { LOG_WARN("local scan and append row for column redefinition failed", K(ret), K(task_id_)); + } else { + ObDDLEventInfo event_info; + LOG_INFO("finish the complement write task", K(ret), "ddl_event_info", ObDDLEventInfo()); } } else if (OB_FAIL(remote_scan())) { LOG_WARN("remote scan for recover restore table ddl failed", K(ret)); @@ -781,6 +807,8 @@ int ObComplementWriteTask::process() context_->complement_data_ret_ = ret; ret = OB_SUCCESS; } + + add_ddl_event(param_, "complement write task"); return ret; } @@ -965,6 +993,8 @@ int ObComplementWriteTask::do_local_scan() } else if (OB_UNLIKELY(nullptr == ls_handle.get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls is null", K(ret), K(ls_handle)); + } else if (OB_FAIL(DDL_SIM(tenant_id, param_->task_id_, COMPLEMENT_DATA_TASK_LOCAL_SCAN_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), KPC(param_)); } else if (OB_FAIL(ls_handle.get_ls()->get_tablet_svr()->get_read_tables(param_->orig_tablet_id_, ObTabletCommon::DEFAULT_GET_TABLET_DURATION_US, param_->snapshot_version_, iterator, allow_not_ready))) { @@ -1214,6 +1244,8 @@ int ObComplementWriteTask::append_row(ObScan *scan) t1 = ObTimeUtility::current_time(); if (OB_FAIL(dag_yield())) { LOG_WARN("fail to yield dag", KR(ret)); + } else if (OB_FAIL(DDL_SIM(param_->dest_tenant_id_, param_->task_id_, DDL_INSERT_SSTABLE_GET_NEXT_ROW_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), KPC(param_)); } else if (OB_FAIL(scan->get_next_row(tmp_row, reshape_row_only_for_remote_scan))) { if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("fail to get next row", K(ret)); @@ -1428,6 +1460,8 @@ int ObComplementMergeTask::process() ret = OB_SUCCESS == ret ? tmp_ret : ret; LOG_WARN("fail to report replica build status", K(ret), K(tmp_ret)); } + + add_ddl_event(param_, "complement merge task"); return ret; } diff --git a/src/storage/ddl/ob_complement_data_task.h b/src/storage/ddl/ob_complement_data_task.h index 2e387c5f7f..e4e9a825df 100644 --- a/src/storage/ddl/ob_complement_data_task.h +++ b/src/storage/ddl/ob_complement_data_task.h @@ -113,6 +113,8 @@ public: ObSEArray ranges_; }; +void add_ddl_event(const ObComplementDataParam *param, const ObString &stmt); + struct ObComplementDataContext final { public: diff --git a/src/storage/ddl/ob_ddl_merge_task.cpp b/src/storage/ddl/ob_ddl_merge_task.cpp index 4125dd5e94..f64af24a8e 100644 --- a/src/storage/ddl/ob_ddl_merge_task.cpp +++ b/src/storage/ddl/ob_ddl_merge_task.cpp @@ -30,6 +30,8 @@ #include "storage/tx_storage/ob_ls_service.h" #include "storage/tx_storage/ob_ls_handle.h" #include "share/schema/ob_multi_version_schema_service.h" +#include "share/ob_ddl_sim_point.h" +#include "observer/ob_server_event_history_table_operator.h" #include "storage/column_store/ob_column_oriented_sstable.h" using namespace oceanbase::observer; @@ -209,7 +211,7 @@ int ObDDLTableDumpTask::init(const share::ObLSID &ls_id, int ObDDLTableDumpTask::process() { int ret = OB_SUCCESS; - LOG_INFO("ddl dump task start process", K(*this)); + LOG_INFO("ddl dump task start process", K(*this), "ddl_event_info", ObDDLEventInfo()); ObTabletHandle tablet_handle; ObDDLKvMgrHandle ddl_kv_mgr_handle; ObLSHandle ls_handle; @@ -252,6 +254,14 @@ int ObDDLTableDumpTask::process() LOG_WARN("release ddl kv failed", K(ret), K(freeze_scn_)); } } + SERVER_EVENT_ADD("ddl", "ddl table dump task", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "tablet_id", tablet_id_, + "freeze_scn", freeze_scn_, + "ls_id", ls_id_); + LOG_INFO("ddl dump task start process", K(ret), K(*this), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -289,10 +299,12 @@ int ObDDLTableMergeTask::process() #ifdef ERRSIM if (0 != GCONF.errsim_max_ddl_sstable_count) { MAX_DDL_SSTABLE = GCONF.errsim_max_ddl_sstable_count; - LOG_INFO("set max ddl sstable in errsim mode", K(MAX_DDL_SSTABLE)); + } else { + MAX_DDL_SSTABLE = 2; } + LOG_INFO("set max ddl sstable in errsim mode", K(MAX_DDL_SSTABLE)); #endif - LOG_INFO("ddl merge task start process", K(*this)); + LOG_INFO("ddl merge task start process", K(*this), "ddl_event_info", ObDDLEventInfo()); ObTabletHandle tablet_handle; ObDDLKvMgrHandle ddl_kv_mgr_handle; ObLSHandle ls_handle; @@ -345,6 +357,7 @@ int ObDDLTableMergeTask::process() } else { sstable = static_cast( table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/)); + skip_major_process = true; } } else if (tablet_handle.get_obj()->get_tablet_meta().table_store_flag_.with_major_sstable()) { skip_major_process = true; @@ -357,6 +370,16 @@ int ObDDLTableMergeTask::process() } else if (merge_param_.start_scn_ > SCN::min_scn() && merge_param_.start_scn_ < ddl_param.start_scn_) { ret = OB_TASK_EXPIRED; LOG_INFO("ddl merge task expired, do nothing", K(merge_param_), "new_start_scn", ddl_param.start_scn_); +#ifdef ERRSIM + } else { + const SCN commit_scn = ddl_kv_mgr_handle.get_obj()->get_commit_scn(tablet_handle.get_obj()->get_tablet_meta()); + skip_major_process = commit_scn.is_valid_and_not_min() + && ObTimeUtility::current_time() - commit_scn.convert_to_ts() <= GCONF.errsim_ddl_major_delay_time; +#endif + } + if (OB_FAIL(ret)) { + } else if (skip_major_process) { + // do nothing } else if (OB_FAIL(ObTabletDDLUtil::compact_ddl_sstable(*tablet_handle.get_obj(), ddl_table_iter, tablet_handle.get_obj()->get_rowkey_read_info(), @@ -387,12 +410,14 @@ int ObDDLTableMergeTask::process() LOG_WARN("fail to submit tablet update task", K(ret), K(tenant_id), K(merge_param_)); } if (OB_FAIL(ret)) { + } else if (OB_ISNULL(sstable)) { + // not set success } else if (OB_FAIL(ddl_kv_mgr_handle.get_obj()->set_commit_success(merge_param_.start_scn_))) { if (OB_EAGAIN != ret) { LOG_WARN("set is commit success failed", K(ret)); } } else { - LOG_INFO("commit ddl sstable succ", K(ddl_param), K(merge_param_)); + LOG_INFO("commit ddl sstable succ", K(ddl_param), K(merge_param_), "ddl_event_info", ObDDLEventInfo()); } } } @@ -948,6 +973,8 @@ int ObTabletDDLUtil::report_ddl_checksum(const share::ObLSID &ls_id, ret = OB_TABLE_NOT_EXIST; LOG_INFO("table not exit", K(ret), K(tenant_id), K(table_id)); ret = OB_TASK_EXPIRED; // for ignore warning + } else if (OB_FAIL(DDL_SIM(tenant_id, ddl_task_id, REPORT_DDL_CHECKSUM_FAILED))) { + LOG_WARN("ddl sim failure", K(tenant_id), K(ddl_task_id)); } else { ObArray column_ids; ObArray ddl_checksum_items; diff --git a/src/storage/ddl/ob_ddl_redo_log_writer.cpp b/src/storage/ddl/ob_ddl_redo_log_writer.cpp index f395b29082..ec63c01cfb 100644 --- a/src/storage/ddl/ob_ddl_redo_log_writer.cpp +++ b/src/storage/ddl/ob_ddl_redo_log_writer.cpp @@ -29,6 +29,7 @@ #include "observer/ob_server_event_history_table_operator.h" #include "storage/tablet/ob_tablet.h" #include "rootserver/ddl_task/ob_ddl_task.h" +#include "share/ob_ddl_sim_point.h" using namespace oceanbase::common; using namespace oceanbase::storage; @@ -187,6 +188,8 @@ int ObDDLCtrlSpeedItem::do_sleep( } else if (next_available_ts <= 0 || OB_INVALID_TENANT_ID == tenant_id || task_id == 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument.", K(ret), K(next_available_ts), K(tenant_id), K(task_id)); + } else if (OB_FAIL(DDL_SIM(MTL_ID(), task_id, DDL_REDO_WRITER_SPEED_CONTROL_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(MTL_ID()), K(task_id)); } else if (OB_TMP_FAIL(check_need_stop_write(ddl_kv_mgr_handle, is_need_stop_write))) { LOG_WARN("fail to check need stop write", K(tmp_ret), K(ddl_kv_mgr_handle)); } @@ -394,6 +397,8 @@ int ObDDLCtrlSpeedHandle::limit_and_sleep(const uint64_t tenant_id, } else if (OB_UNLIKELY(!speed_handle_map_.created())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("speed handle map is not created", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, WRITE_DUPLICATED_DDL_REDO_LOG))) { + LOG_WARN("ddl sim remote write", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(add_ctrl_speed_item(speed_handle_key, item_handle))) { LOG_WARN("add speed item failed", K(ret)); } else if (OB_FAIL(item_handle.get_ctrl_speed_item(speed_handle_item))) { @@ -721,6 +726,8 @@ int ObDDLRedoLogWriter::write( /* use the ObString data_buffer_ in tmp_log.redo_info_, do not rely on the macro_block_buf in original log*/ } else if (OB_FAIL(cb->init(ls_id, tmp_log.get_redo_info(), macro_block_id, tablet_handle, ddl_kv_mgr_handle))) { LOG_WARN("init ddl clog callback failed", K(ret)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, DDL_REDO_WRITER_WRITE_MACRO_LOG_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(log_handler->append(buffer, buffer_size, base_scn, @@ -1155,6 +1162,7 @@ int ObDDLSSTableRedoWriter::init(const ObLSID &ls_id, const ObTabletID &tablet_i } int ObDDLSSTableRedoWriter::start_ddl_redo(const ObITable::TableKey &table_key, + const int64_t ddl_task_id, const int64_t execution_id, const int64_t data_format_version, ObDDLKvMgrHandle &ddl_kv_mgr_handle) @@ -1183,6 +1191,8 @@ int ObDDLSSTableRedoWriter::start_ddl_redo(const ObITable::TableKey &table_key, LOG_WARN("get tablet handle failed", K(ret), K(ls_id_), K(tablet_id_)); } else if (OB_FAIL(tablet_handle.get_obj()->get_ddl_kv_mgr(ddl_kv_mgr_handle, true/*try_create*/))) { LOG_WARN("create ddl kv mgr failed", K(ret)); + } else if (OB_FAIL(DDL_SIM(MTL_ID(), ddl_task_id, DDL_REDO_WRITER_WRITE_START_LOG_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(MTL_ID()), K(ddl_task_id)); } else if (OB_FAIL(ObDDLRedoLogWriter::get_instance().write_ddl_start_log(ls_handle, tablet_handle, ddl_kv_mgr_handle, log, ls->get_log_handler(), tmp_scn))) { LOG_WARN("fail to write ddl start log", K(ret), K(table_key)); } else if (FALSE_IT(set_start_scn(tmp_scn))) { @@ -1191,6 +1201,14 @@ int ObDDLSSTableRedoWriter::start_ddl_redo(const ObITable::TableKey &table_key, } else { ddl_kv_mgr_handle.get_obj()->reset_commit_success(); // releated issue: } + SERVER_EVENT_ADD("ddl", "ddl write start log", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", ddl_task_id, + "tablet_id", tablet_id_, + "start_scn", get_start_scn()); + LOG_INFO("ddl write start log", K(ret), "ddl_event_info", ObDDLEventInfo(), K(ddl_task_id)); return ret; } @@ -1271,6 +1289,10 @@ int ObDDLSSTableRedoWriter::end_ddl_redo_and_create_ddl_sstable( ret = OB_ERR_SYS; LOG_WARN("tablet handle is null", K(ret), K(ls_id), K(tablet_id)); } else { + bool need_report_ddl_checksum = true; +#ifdef ERRSIM + need_report_ddl_checksum = 0 == GCONF.errsim_ddl_major_delay_time; +#endif ObTabletMemberWrapper table_store_wrapper; ObSSTableMetaHandle sst_meta_hdl; const ObSSTable *first_major_sstable = nullptr; @@ -1278,6 +1300,8 @@ int ObDDLSSTableRedoWriter::end_ddl_redo_and_create_ddl_sstable( LOG_WARN("fail to fetch table store", K(ret)); } else if (OB_FALSE_IT(first_major_sstable = static_cast( table_store_wrapper.get_member()->get_major_sstables().get_boundary_table(false/*first*/)))) { + } else if (!need_report_ddl_checksum) { + // skip } else if (OB_ISNULL(first_major_sstable)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("no major after wait merge success", K(ret), K(ls_id), K(tablet_id)); @@ -1431,6 +1455,15 @@ int ObDDLSSTableRedoWriter::write_commit_log(ObTabletHandle &tablet_handle, is_remote_write = !(leader_addr_ == GCTX.self_addr()); } } + SERVER_EVENT_ADD("ddl", "ddl write commit log", + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "start_scn", start_scn_, + "tablet_id", tablet_id_, + "commit_scn", commit_scn, + is_remote_write); + LOG_INFO("ddl write commit log", K(ret), "ddl_event_info", ObDDLEventInfo()); return ret; } diff --git a/src/storage/ddl/ob_ddl_redo_log_writer.h b/src/storage/ddl/ob_ddl_redo_log_writer.h index cc16dfdc08..5388578337 100644 --- a/src/storage/ddl/ob_ddl_redo_log_writer.h +++ b/src/storage/ddl/ob_ddl_redo_log_writer.h @@ -128,7 +128,11 @@ private: int init(int tg_id); virtual void runTimerTask() override; private: +#ifdef ERRSIM + const static int64_t REFRESH_INTERVAL = 100 * 1000; // 100ms +#else const static int64_t REFRESH_INTERVAL = 1 * 1000 * 1000; // 1s +#endif bool is_inited_; DISABLE_COPY_ASSIGN(RefreshSpeedHandleTask); }; @@ -286,6 +290,7 @@ public: ~ObDDLSSTableRedoWriter(); int init(const share::ObLSID &ls_id, const ObTabletID &tablet_id); int start_ddl_redo(const ObITable::TableKey &table_key, + const int64_t ddl_task_id, const int64_t execution_id, const int64_t data_format_version, ObDDLKvMgrHandle &ddl_kv_mgr_handle); diff --git a/src/storage/ddl/ob_ddl_replay_executor.cpp b/src/storage/ddl/ob_ddl_replay_executor.cpp index ac0089df11..712124346f 100644 --- a/src/storage/ddl/ob_ddl_replay_executor.cpp +++ b/src/storage/ddl/ob_ddl_replay_executor.cpp @@ -143,7 +143,7 @@ int ObDDLStartReplayExecutor::do_replay_(ObTabletHandle &handle) } else { LOG_INFO("succeed to replay ddl start log", K(ret), KPC_(log), K_(scn)); } - LOG_INFO("finish replay ddl start log", K(ret), K(need_replay), KPC_(log), K_(scn)); + LOG_INFO("finish replay ddl start log", K(ret), K(need_replay), KPC_(log), K_(scn), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -224,7 +224,7 @@ int ObDDLRedoReplayExecutor::do_replay_(ObTabletHandle &handle) } } } - LOG_INFO("finish replay ddl redo log", K(ret), K(need_replay), KPC_(log)); + LOG_INFO("finish replay ddl redo log", K(ret), K(need_replay), KPC_(log), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -285,7 +285,7 @@ int ObDDLCommitReplayExecutor::do_replay_(ObTabletHandle &handle) //TODO(jianyun } else { LOG_INFO("replay ddl commit log success", K(ret), KPC_(log), K_(scn)); } - LOG_INFO("finish replay ddl commit log", K(ret), K(need_replay), K_(scn), KPC_(log)); + LOG_INFO("finish replay ddl commit log", K(ret), K(need_replay), K_(scn), KPC_(log), "ddl_event_info", ObDDLEventInfo()); return ret; } diff --git a/src/storage/ddl/ob_ddl_server_client.cpp b/src/storage/ddl/ob_ddl_server_client.cpp index 98433ea2b0..712a794271 100644 --- a/src/storage/ddl/ob_ddl_server_client.cpp +++ b/src/storage/ddl/ob_ddl_server_client.cpp @@ -16,12 +16,14 @@ #include "observer/ob_server_struct.h" #include "share/ob_common_rpc_proxy.h" #include "share/ob_ddl_common.h" +#include "share/ob_ddl_sim_point.h" #include "storage/ddl/ob_ddl_heart_beat_task.h" #include "lib/ob_define.h" #include "lib/mysqlclient/ob_isql_client.h" #include "sql/engine/cmd/ob_ddl_executor_util.h" #include "rootserver/ddl_task/ob_table_redefinition_task.h" #include "observer/omt/ob_multi_tenant.h" +#include "observer/ob_server_event_history_table_operator.h" namespace oceanbase { @@ -108,6 +110,17 @@ int ObDDLServerClient::create_hidden_table( // abort_redef_table() function last step must remove heart_beat task, so there is no need to call heart_beat_clear() } } + char tenant_id_buffer[256]; + snprintf(tenant_id_buffer, sizeof(tenant_id_buffer), "tenant_id:%ld, dest_tenant_id:%ld", + arg.tenant_id_, arg.dest_tenant_id_); + SERVER_EVENT_ADD("ddl", "create hidden table", + "tenant_id", tenant_id_buffer, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", res.table_id_, + "schema_version", res.schema_version_); + LOG_INFO("finish create hidden table.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(arg), K(res)); return ret; } @@ -140,6 +153,22 @@ int ObDDLServerClient::start_redef_table(const obrpc::ObStartRedefTableArg &arg, } // abort_redef_table() function last step must remove heart_beat task, so there is no need to call heart_beat_clear() } + char tenant_id_buffer[256]; + snprintf(tenant_id_buffer, sizeof(tenant_id_buffer), "orig_tenant_id:%ld, target_tenant_id:%ld", + arg.orig_tenant_id_, arg.target_tenant_id_); + char table_id_buffer[256]; + snprintf(tenant_id_buffer, sizeof(tenant_id_buffer), "orig_table_id:%ld, target_table_id:%ld", + arg.orig_table_id_, arg.target_table_id_); + + SERVER_EVENT_ADD("ddl", "start redef table", + "tenant_id", tenant_id_buffer, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", res.task_id_, + "table_id", table_id_buffer, + "schema_version", res.schema_version_); + LOG_INFO("start redef table.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(arg), K(res)); + return ret; } @@ -189,6 +218,14 @@ int ObDDLServerClient::copy_table_dependents( } } } + + SERVER_EVENT_ADD("ddl", "copy table dependents", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", arg.task_id_, + "rpc_dst", rs_leader_addr); + LOG_INFO("finish copy table dependents.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(arg), K(rs_leader_addr)); return ret; } @@ -247,6 +284,14 @@ int ObDDLServerClient::abort_redef_table(const obrpc::ObAbortRedefTableArg &arg, LOG_WARN("heart beat clear failed", K(tmp_ret), K(arg.task_id_)); } } + + SERVER_EVENT_ADD("ddl", "abort redef table", + "tenant_id", arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", arg.task_id_, + "rpc_dst", rs_leader_addr); + LOG_INFO("abort redef table.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(arg), K(rs_leader_addr)); return ret; } @@ -306,6 +351,16 @@ int ObDDLServerClient::finish_redef_table(const obrpc::ObFinishRedefTableArg &fi LOG_WARN("heart beat clear failed", K(tmp_ret), K(finish_redef_arg.task_id_)); } } + + SERVER_EVENT_ADD("ddl", "finish redef table", + "tenant_id", finish_redef_arg.tenant_id_, + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "task_id", finish_redef_arg.task_id_, + "snapshot_version", build_single_arg.snapshot_version_, + "rpc_dst", rs_leader_addr, + build_single_arg.ls_id_); + LOG_INFO("finish redef table.", K(ret), "ddl_event_info", ObDDLEventInfo(), K(finish_redef_arg), K(build_single_arg), K(rs_leader_addr)); return ret; } @@ -339,10 +394,14 @@ int ObDDLServerClient::wait_task_reach_pending(const uint64_t tenant_id, const i if (OB_UNLIKELY(task_id <= 0 || OB_INVALID_ID == tenant_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(task_id), K(tenant_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, WAIT_REDEF_TASK_REACH_PENDING_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else { while (OB_SUCC(ret)) { if (OB_FAIL(sql_string.assign_fmt("SELECT status, snapshot_version FROM %s WHERE task_id = %lu", share::OB_ALL_DDL_TASK_STATUS_TNAME, task_id))) { LOG_WARN("assign sql string failed", K(ret), K(task_id)); + } else if (OB_FAIL(DDL_SIM(tenant_id, task_id, WAIT_REDEF_TASK_REACH_PENDING_SLOW))) { + LOG_WARN("ddl sim failure", K(ret), K(tenant_id), K(task_id)); } else if (OB_FAIL(sql_proxy.read(res, tenant_id, sql_string.ptr()))) { LOG_WARN("fail to execute sql", K(ret), K(sql_string)); } else if (OB_ISNULL(result = res.get_result())) { diff --git a/src/storage/ddl/ob_direct_insert_sstable_ctx.cpp b/src/storage/ddl/ob_direct_insert_sstable_ctx.cpp index 1c55ef7ba8..acd5d79ac6 100644 --- a/src/storage/ddl/ob_direct_insert_sstable_ctx.cpp +++ b/src/storage/ddl/ob_direct_insert_sstable_ctx.cpp @@ -17,6 +17,7 @@ #include "share/ob_ddl_error_message_table_operator.h" #include "share/ob_ddl_common.h" #include "share/ob_tablet_autoincrement_service.h" +#include "share/ob_ddl_sim_point.h" #include "storage/ddl/ob_ddl_merge_task.h" #include "storage/blocksstable/index_block/ob_index_block_builder.h" #include "storage/compaction/ob_column_checksum_calculator.h" @@ -488,7 +489,7 @@ int ObSSTableInsertTabletContext::update(const int64_t snapshot_version) LOG_WARN("invalid argument", K(ret), K(table_key)); } else if (data_sstable_redo_writer_.get_start_scn().is_valid_and_not_min()) { // ddl start log is already written, do nothing - } else if (OB_FAIL(data_sstable_redo_writer_.start_ddl_redo(table_key, + } else if (OB_FAIL(data_sstable_redo_writer_.start_ddl_redo(table_key, build_param_.ddl_task_id_, build_param_.execution_id_, build_param_.data_format_version_, ddl_kv_mgr_handle_))) { LOG_WARN("fail write start log", K(ret), K(table_key), K(build_param_)); } @@ -543,6 +544,8 @@ int ObSSTableInsertTabletContext::build_sstable_slice( ret = OB_SUCCESS; break; } + } else if (OB_FAIL(DDL_SIM(MTL_ID(), build_param_.ddl_task_id_, DDL_INSERT_SSTABLE_GET_NEXT_ROW_FAILED))) { + LOG_WARN("ddl sim failure", K(ret), K(MTL_ID()), K(build_param_)); } else if (tablet_id != row_tablet_id) { ret = OB_SUCCESS; break; @@ -554,15 +557,15 @@ int ObSSTableInsertTabletContext::build_sstable_slice( "", static_cast(sizeof("UNIQUE IDX") - 1), "UNIQUE IDX"); char index_key_buffer[OB_TMP_BUF_SIZE_256]; ObStoreRowkey index_key; - int64_t task_id = 0; + ObDDLErrorMessageTableOperator::ObDDLErrorInfo info; index_key.assign(row_val->cells_, rowkey_column_num); if (OB_TMP_FAIL(ObDDLErrorMessageTableOperator::extract_index_key(*table_schema, index_key, index_key_buffer, OB_TMP_BUF_SIZE_256))) { // read the unique key that violates the unique constraint LOG_WARN("extract unique index key failed", K(tmp_ret), K(index_key), K(index_key_buffer)); // TODO(shuangcan): check if we need to change part_id to tablet_id - } else if (OB_TMP_FAIL(ObDDLErrorMessageTableOperator::get_index_task_id(*GCTX.sql_proxy_, *table_schema, task_id))) { - LOG_WARN("get task id of index table failed", K(tmp_ret), K(task_id), KPC(table_schema)); - } else if (OB_TMP_FAIL(ObDDLErrorMessageTableOperator::generate_index_ddl_error_message(ret, *table_schema, - task_id, row_tablet_id.id(), GCTX.self_addr(), *GCTX.sql_proxy_, index_key_buffer, report_ret_code))) { + } else if (OB_TMP_FAIL(ObDDLErrorMessageTableOperator::get_index_task_info(*GCTX.sql_proxy_, *table_schema, info))) { + LOG_WARN("get task id of index table failed", K(tmp_ret), K(info), KPC(table_schema)); + } else if (OB_TMP_FAIL(ObDDLErrorMessageTableOperator::generate_index_ddl_error_message(ret, *table_schema, info.trace_id_str_, + info.task_id_, info.parent_task_id_, row_tablet_id.id(), GCTX.self_addr(), *GCTX.sql_proxy_, index_key_buffer, report_ret_code))) { LOG_WARN("generate index ddl error message", K(tmp_ret), K(ret), K(report_ret_code)); } if (OB_ERR_DUPLICATED_UNIQUE_KEY == report_ret_code) { diff --git a/src/storage/ddl/ob_tablet_ddl_kv_mgr.cpp b/src/storage/ddl/ob_tablet_ddl_kv_mgr.cpp index 8fb6f6c7da..924ebf7535 100644 --- a/src/storage/ddl/ob_tablet_ddl_kv_mgr.cpp +++ b/src/storage/ddl/ob_tablet_ddl_kv_mgr.cpp @@ -227,7 +227,7 @@ int ObTabletDDLKvMgr::ddl_commit(ObTablet &tablet, const SCN &start_scn, const S "wait_elpased_s", (ObTimeUtility::fast_current_time() - start_ts) / 1000000L); } } else { - LOG_INFO("schedule ddl commit task success", K(start_scn), K(commit_scn), K(*this)); + LOG_INFO("schedule ddl commit task success", K(start_scn), K(commit_scn), K(*this), "ddl_event_info", ObDDLEventInfo()); } } return ret; @@ -243,7 +243,7 @@ int ObTabletDDLKvMgr::schedule_ddl_dump_task(ObTablet &tablet, const SCN &start_ param.is_commit_ = false; param.start_scn_ = start_scn; param.compat_mode_ = tablet.get_tablet_meta().compat_mode_; - LOG_INFO("schedule ddl dump task", K(param)); + LOG_INFO("schedule ddl dump task", K(param), "ddl_event_info", ObDDLEventInfo()); if (OB_UNLIKELY(tablet.get_tablet_meta().tablet_id_ != tablet_id_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("tablet id mismatched", K(ret), K(tablet), KPC(this)); @@ -300,6 +300,15 @@ int ObTabletDDLKvMgr::schedule_ddl_merge_task(ObTablet &tablet, const SCN &start } } else { ret = OB_EAGAIN; // until major sstable is ready +#ifdef ERRSIM + if (GCONF.errsim_ddl_major_delay_time.get() > 0) { + ObLatchRGuard guard(lock_, ObLatchIds::TABLET_DDL_KV_MGR_LOCK); + if (commit_scn_.is_valid_and_not_min()) { + ret = OB_SUCCESS; + FLOG_INFO("assume ddl success for delay schedule ddl merge task", K(ret), KPC(this)); + } + } +#endif } } return ret; @@ -327,7 +336,11 @@ int ObTabletDDLKvMgr::wait_ddl_merge_success(ObTablet &tablet, const SCN &start_ LOG_WARN("check status failed", K(ret)); } else if (OB_FAIL(schedule_ddl_merge_task(tablet, start_scn, commit_scn))) { if (OB_EAGAIN == ret) { +#ifdef ERRSIM + ob_usleep(1000L * 1000L); // 1s +#else ob_usleep(100L); // 100us. +#endif ret = OB_SUCCESS; // retry } else { LOG_WARN("commit ddl log failed", K(ret), K(start_scn), K(commit_scn), K(ls_id_), K(tablet_id_)); @@ -352,7 +365,11 @@ int ObTabletDDLKvMgr::get_ddl_major_merge_param(ObTablet &tablet, ObDDLTableMerg LOG_WARN("failed to get ddl kv mgr", K(ret)); } else if (OB_FAIL(rdlock(TRY_LOCK_TIMEOUT, lock_tid))) { LOG_WARN("failed to rdlock", K(ret), KPC(this)); - } else if (can_schedule_major_compaction_nolock(tablet.get_tablet_meta())) { + } else if (can_schedule_major_compaction_nolock(tablet.get_tablet_meta()) +#ifdef ERRSIM + && ObTimeUtility::current_time() - get_commit_scn(tablet.get_tablet_meta()).convert_to_ts() > GCONF.errsim_ddl_major_delay_time +#endif + ) { param.ls_id_ = ls_id_; param.tablet_id_ = tablet_id_; param.rec_scn_ = get_commit_scn(tablet.get_tablet_meta()); diff --git a/src/storage/ls/ob_ls_ddl_log_handler.cpp b/src/storage/ls/ob_ls_ddl_log_handler.cpp index 4de17cbe8e..3b2e751926 100644 --- a/src/storage/ls/ob_ls_ddl_log_handler.cpp +++ b/src/storage/ls/ob_ls_ddl_log_handler.cpp @@ -22,6 +22,7 @@ #include "storage/ddl/ob_ddl_replay_executor.h" #include "logservice/ob_log_base_header.h" #include "share/scn.h" +#include "observer/ob_server_event_history_table_operator.h" namespace oceanbase { @@ -202,7 +203,9 @@ int ObLSDDLLogHandler::offline() TCWLockGuard guard(online_lock_); is_online_ = false; } - FLOG_INFO("ddl log hanlder offline", K(ret), "ls_meta", ls_->get_ls_meta()); + + add_ddl_event(ret, "ddl log hanlder offline"); + FLOG_INFO("ddl log hanlder offline", K(ret), "ls_meta", ls_->get_ls_meta(), "ddl_event_info", ObDDLEventInfo()); return OB_SUCCESS; } @@ -239,7 +242,8 @@ int ObLSDDLLogHandler::online() TCWLockGuard guard(online_lock_); is_online_ = true; } - FLOG_INFO("ddl log hanlder online", K(ret), "ls_meta", ls_->get_ls_meta()); + add_ddl_event(ret, "ddl log hanlder online"); + FLOG_INFO("ddl log hanlder online", K(ret), "ls_meta", ls_->get_ls_meta(), "ddl_event_info", ObDDLEventInfo()); return ret; } @@ -499,6 +503,15 @@ int ObLSDDLLogHandler::replay_ddl_start_log_(const char *log_buf, return ret; } +void ObLSDDLLogHandler::add_ddl_event(const int ret, const ObString &ddl_event_stmt) +{ + SERVER_EVENT_ADD("ddl", ddl_event_stmt.ptr(), + "tenant_id", MTL_ID(), + "ret", ret, + "trace_id", *ObCurTraceId::get_trace_id(), + "last_rec_scn", last_rec_scn_); +} + int ObLSDDLLogHandler::add_tablet(const ObTabletID &tablet_id) { int ret = OB_SUCCESS; diff --git a/src/storage/ls/ob_ls_ddl_log_handler.h b/src/storage/ls/ob_ls_ddl_log_handler.h index 9f008bc9be..d52256c491 100644 --- a/src/storage/ls/ob_ls_ddl_log_handler.h +++ b/src/storage/ls/ob_ls_ddl_log_handler.h @@ -97,6 +97,7 @@ private: int replay_ddl_commit_log_(const char *log_buf, const int64_t buf_size, int64_t pos, const share::SCN &scn); int replay_ddl_tablet_schema_version_change_log_(const char *log_buf, const int64_t buf_size, int64_t pos, const share::SCN &scn); int replay_ddl_start_log_(const char *log_buf, const int64_t buf_size, int64_t pos, const share::SCN &scn); + void add_ddl_event(const int ret, const ObString &ddl_event_stmt); private: bool is_inited_; bool is_online_; diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result index bdfe957913..22ce050912 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/desc_virtual_table_in_sys.result @@ -1929,6 +1929,29 @@ IF(count(*) >= 0, 1, 0) "oceanbase.__all_virtual_tablet_stat runs in single server" IF(count(*) >= 0, 1, 0) 1 +desc oceanbase.__all_virtual_ddl_sim_point; +Field Type Null Key Default Extra +sim_point_id bigint(20) NO NULL +sim_point_name varchar(1024) NO NULL +sim_point_description varchar(256) NO NULL +sim_point_action varchar(256) NO NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_ddl_sim_point; +IF(count(*) >= 0, 1, 0) +1 +desc oceanbase.__all_virtual_ddl_sim_point_stat; +Field Type Null Key Default Extra +svr_ip varchar(46) NO NULL +svr_port bigint(20) NO NULL +tenant_id bigint(20) NO NULL +ddl_task_id bigint(20) NO NULL +sim_point_id bigint(20) NO NULL +trigger_count bigint(20) NO NULL +select /*+QUERY_TIMEOUT(60000000)*/ IF(count(*) >= 0, 1, 0) from oceanbase.__all_virtual_ddl_sim_point_stat; +IF(count(*) >= 0, 1, 0) +1 +"oceanbase.__all_virtual_ddl_sim_point_stat runs in single server" +IF(count(*) >= 0, 1, 0) +1 desc information_schema.SESSION_VARIABLES; Field Type Null Key Default Extra VARIABLE_NAME varchar(128) NO diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result index ff84f39591..93fa58170e 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result @@ -353,6 +353,8 @@ select 0xffffffffff & table_id, table_name, table_type, database_id, part_num fr 11115 __all_virtual_io_quota 2 201001 1 11116 __all_virtual_server_compaction_event_history 2 201001 1 11117 __all_virtual_tablet_stat 2 201001 1 +11118 __all_virtual_ddl_sim_point 2 201001 1 +11119 __all_virtual_ddl_sim_point_stat 2 201001 1 12001 SESSION_VARIABLES 2 201002 1 12006 GLOBAL_STATUS 2 201002 1 12008 SESSION_STATUS 2 201002 1