From 29704755399456563601a089c59e76e6a53caa30 Mon Sep 17 00:00:00 2001 From: SanmuWangZJU Date: Mon, 18 Sep 2023 03:44:04 +0000 Subject: [PATCH] [OBCDC] Fix memory not controlled caused by redo dispatch not under controll --- deps/oblib/src/lib/allocator/ob_mod_define.h | 8 +- src/logservice/libobcdc/src/CMakeLists.txt | 1 + .../libobcdc/src/ob_cdc_auto_config_mgr.cpp | 139 ++++++++++ .../libobcdc/src/ob_cdc_auto_config_mgr.h | 94 +++++++ .../src/ob_cdc_lob_aux_meta_storager.cpp | 28 +- .../libobcdc/src/ob_cdc_macro_utils.h | 47 ++++ .../src/ob_log_binlog_record_queue.cpp | 2 +- src/logservice/libobcdc/src/ob_log_config.cpp | 22 +- src/logservice/libobcdc/src/ob_log_config.h | 39 +-- .../libobcdc/src/ob_log_ddl_parser.cpp | 2 +- .../libobcdc/src/ob_log_dml_parser.cpp | 2 +- .../libobcdc/src/ob_log_fetch_log_rpc.cpp | 32 ++- .../libobcdc/src/ob_log_fetch_log_rpc.h | 1 + .../libobcdc/src/ob_log_fetcher.cpp | 4 +- .../libobcdc/src/ob_log_fetcher_dead_pool.cpp | 4 +- .../libobcdc/src/ob_log_fetcher_idle_pool.cpp | 12 +- .../libobcdc/src/ob_log_formatter.cpp | 66 ++--- .../libobcdc/src/ob_log_instance.cpp | 246 ++++++++++++++---- src/logservice/libobcdc/src/ob_log_instance.h | 14 +- .../libobcdc/src/ob_log_ls_fetch_stream.cpp | 20 +- .../libobcdc/src/ob_log_ls_fetch_stream.h | 3 + .../libobcdc/src/ob_log_meta_data_fetcher.cpp | 1 - .../libobcdc/src/ob_log_mysql_connector.h | 1 - .../libobcdc/src/ob_log_mysql_proxy.cpp | 4 +- .../libobcdc/src/ob_log_part_trans_task.cpp | 2 +- src/logservice/libobcdc/src/ob_log_reader.cpp | 6 +- .../src/ob_log_resource_collector.cpp | 14 +- .../src/ob_log_resource_recycle_task.h | 10 +- .../src/ob_log_rocksdb_store_service.cpp | 8 +- .../libobcdc/src/ob_log_schema_getter.cpp | 4 + .../libobcdc/src/ob_log_sequencer1.cpp | 195 ++++++++++---- .../libobcdc/src/ob_log_sequencer1.h | 24 +- .../libobcdc/src/ob_log_storager.cpp | 14 +- .../libobcdc/src/ob_log_systable_helper.cpp | 7 +- .../libobcdc/src/ob_log_task_pool.h | 3 +- .../src/ob_log_timezone_info_getter.cpp | 7 +- .../src/ob_log_trans_dispatch_ctx.cpp | 34 ++- .../libobcdc/src/ob_log_trans_dispatch_ctx.h | 3 + .../libobcdc/src/ob_log_trans_msg_sorter.cpp | 4 +- .../src/ob_log_trans_redo_dispatcher.cpp | 9 +- src/logservice/libobcdc/src/ob_ls_worker.cpp | 17 +- .../libobcdc/src/ob_map_queue_thread.h | 16 +- .../libobcdc/tests/demo/obcdc_demo.cpp | 26 +- .../libobcdc/tests/ob_binlog_record_printer.h | 3 + src/logservice/libobcdc/tests/obcdc_main.cpp | 40 ++- src/logservice/libobcdc/tests/obcdc_main.h | 1 + .../logfetcher/ob_log_fetch_log_rpc.h | 1 + .../logfetcher/ob_log_ls_fetch_stream.h | 3 + .../logfetcher/ob_log_start_lsn_locator.cpp | 20 +- src/logservice/logfetcher/ob_ls_worker.cpp | 5 +- .../logfetcher/ob_map_queue_thread.h | 14 +- src/share/config/ob_common_config.cpp | 4 +- src/share/config/ob_common_config.h | 1 + unittest/libobcdc/test_log_task_pool.cpp | 6 +- .../test_ob_cdc_part_trans_resolver.cpp | 21 +- 55 files changed, 1004 insertions(+), 310 deletions(-) create mode 100644 src/logservice/libobcdc/src/ob_cdc_auto_config_mgr.cpp create mode 100644 src/logservice/libobcdc/src/ob_cdc_auto_config_mgr.h create mode 100644 src/logservice/libobcdc/src/ob_cdc_macro_utils.h diff --git a/deps/oblib/src/lib/allocator/ob_mod_define.h b/deps/oblib/src/lib/allocator/ob_mod_define.h index 63b59ae265..3ffd47ba39 100644 --- a/deps/oblib/src/lib/allocator/ob_mod_define.h +++ b/deps/oblib/src/lib/allocator/ob_mod_define.h @@ -401,10 +401,10 @@ LABEL_ITEM_DEF(OB_LOG_FETCH_STREAM_POOL, LogFetchStrePoo) LABEL_ITEM_DEF(OB_LOG_TIMER, LogTimer) LABEL_ITEM_DEF(OB_LOG_PART_PROGRESS_CONTROLLER, LogPartProgrCon) LABEL_ITEM_DEF(OB_LOG_PART_TRANS_RESOLVER, LogPartTransRes) -LABEL_ITEM_DEF(OB_LOG_FETCH_LOG_ARPC_RES_QUEUE, LogFetLogArpReQ) -LABEL_ITEM_DEF(OB_LOG_FETCH_LOG_ARPC_RESULT, FetchLogArpcRes) -LABEL_ITEM_DEF(OB_LOG_FETCH_LOG_ARPC_REQUEST, LogFetcLogArpRe) -LABEL_ITEM_DEF(OB_LOG_FETCH_LOG_SRPC, LogFetchLogSrpc) +LABEL_ITEM_DEF(OB_LOG_FETCH_LOG_ARPC_RES_QUEUE, LogFetcArpcResQ) +LABEL_ITEM_DEF(OB_LOG_FETCH_LOG_ARPC_RESULT, LogFetcArpcRes) +LABEL_ITEM_DEF(OB_LOG_FETCH_LOG_ARPC_REQUEST, LogFetcArpcReq) +LABEL_ITEM_DEF(OB_LOG_FETCH_LOG_SRPC, LogFetchSrpc) LABEL_ITEM_DEF(OB_LOG_CONFIG, LogConfig) LABEL_ITEM_DEF(OB_LOG_COMMITTER_CHECKPOINT_QUEUE, LogCommiChecQue) LABEL_ITEM_DEF(OB_LOG_DML_PARSER, LogDmlParser) diff --git a/src/logservice/libobcdc/src/CMakeLists.txt b/src/logservice/libobcdc/src/CMakeLists.txt index 277cba3926..6de5640d3b 100644 --- a/src/logservice/libobcdc/src/CMakeLists.txt +++ b/src/logservice/libobcdc/src/CMakeLists.txt @@ -22,6 +22,7 @@ target_link_libraries(obcdc_base ob_set_subtarget(obcdc_object_list common libobcdc.cpp + ob_cdc_auto_config_mgr.cpp ob_cdc_define.cpp ob_cdc_tablet_to_table_info.cpp ob_cdc_lob_ctx.cpp diff --git a/src/logservice/libobcdc/src/ob_cdc_auto_config_mgr.cpp b/src/logservice/libobcdc/src/ob_cdc_auto_config_mgr.cpp new file mode 100644 index 0000000000..81eadae96a --- /dev/null +++ b/src/logservice/libobcdc/src/ob_cdc_auto_config_mgr.cpp @@ -0,0 +1,139 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX OBLOG + +#include "ob_cdc_auto_config_mgr.h" +#include "ob_log_config.h" // TCONF +#include "ob_log_utils.h" + +#define REFRESH_NUM_FIELD_DIRECT(FIELD_NAME, FIELD_VALUE) \ + do { \ + set_##FIELD_NAME(FIELD_VALUE); \ + LOG_INFO("[AUTO_CONFIG]", K_(FIELD_NAME)); \ + } while (0) + +#define REFRESH_NUM_FIELD_WITH_CONFIG(FIELD_NAME, FIELD_VALUE, CONFIG_VALUE) \ + do { \ + if (CONFIG_VALUE > 0) { \ + set_##FIELD_NAME(CONFIG_VALUE); \ + LOG_INFO("[AUTO_CONFIG][USER_CONFIG]", K_(FIELD_NAME)); \ + } else { \ + set_##FIELD_NAME(FIELD_VALUE); \ + LOG_INFO("[AUTO_CONFIG][AUTO]", K_(FIELD_NAME)); \ + } \ + } while (0) + + +namespace oceanbase +{ +using namespace oceanbase::common; +namespace libobcdc +{ + +// max queue length is 10W +const int64_t ObCDCAutoConfigMgr::MAX_QUEUE_LENGTH = 100000; + +ObCDCAutoConfigMgr &ObCDCAutoConfigMgr::get_instance() +{ + static ObCDCAutoConfigMgr instance; + return instance; +} + +void ObCDCAutoConfigMgr::reset() +{ +} + +void ObCDCAutoConfigMgr::init(const ObLogConfig &config) +{ + init_queue_length_(config); + refresh_dynamic_config_(config); + LOG_INFO("init ObCDCAutoConfigMgr succ"); +} + +void ObCDCAutoConfigMgr::configure(const ObLogConfig &config) +{ + refresh_dynamic_config_(config); +} + +void ObCDCAutoConfigMgr::refresh_factor_(const ObLogConfig &config) +{ + REFRESH_NUM_FIELD_DIRECT(memory_limit, config.memory_limit.get()); + factor_ = get_log2_(memory_limit_) - 20; + if (factor_ < 11) factor_ = 11; + _LOG_INFO("[AUTO_CONFIG][MEMORY_LIMIT: %s(%ld)][FACTOR: %ld]", + SIZE_TO_STR(memory_limit_), memory_limit_, factor_); +} + +void ObCDCAutoConfigMgr::init_queue_length_(const ObLogConfig &config) +{ + refresh_factor_(config); + const static int64_t DEFAULT_STORAGE_QUEUE_LENGTH = 1024; + int64_t auto_queue_length = 1 << (factor_ - 3); + if (auto_queue_length > MAX_QUEUE_LENGTH) auto_queue_length = MAX_QUEUE_LENGTH; + const int64_t br_queue_length = std::min((auto_queue_length * 32), MAX_QUEUE_LENGTH); + REFRESH_NUM_FIELD_DIRECT(auto_queue_length, auto_queue_length); + REFRESH_NUM_FIELD_WITH_CONFIG(br_queue_length, br_queue_length, config.br_queue_length.get()); + const int64_t resource_collector_queue_length = br_queue_length_; + REFRESH_NUM_FIELD_DIRECT(resource_collector_queue_length, resource_collector_queue_length); + REFRESH_NUM_FIELD_DIRECT(formatter_queue_length, br_queue_length_); + REFRESH_NUM_FIELD_DIRECT(dml_parser_queue_length, br_queue_length_); + REFRESH_NUM_FIELD_DIRECT(lob_data_merger_queue_length, br_queue_length_); + + const int64_t msg_sorter_queue_length = br_queue_length; + REFRESH_NUM_FIELD_WITH_CONFIG(msg_sorter_task_count_upper_limit, msg_sorter_queue_length, config.msg_sorter_task_count_upper_limit.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(sequencer_queue_length, MAX_QUEUE_LENGTH, config.sequencer_queue_length.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(storager_queue_length, DEFAULT_STORAGE_QUEUE_LENGTH, config.storager_queue_length.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(reader_queue_length, DEFAULT_STORAGE_QUEUE_LENGTH, config.reader_queue_length.get()); +} + +void ObCDCAutoConfigMgr::refresh_dynamic_config_(const ObLogConfig &config) +{ + refresh_factor_(config); + const static int64_t DEFAULT_STORAGER_MEM_PERCENT = 1; + const static int64_t DEFAULT_STORAGER_TASK_UPPER_BOUND = 100; + const int64_t redo_dispatcher_limit = (1 << (factor_ - 11)) * 32 * _M_; + const int64_t auto_part_trans_task_upper_bound = 2000 * (factor_ - 1); + const int64_t active_part_trans_task_upper_bound = auto_part_trans_task_upper_bound; + const int64_t reusable_part_trans_task_upper_bound = auto_part_trans_task_upper_bound; + const int64_t ready_to_seq_task_upper_bound = auto_part_trans_task_upper_bound; + const int64_t extra_redo_dispatch_memory_size = 1 * _K_ + (1 << (factor_ - 11)) * (factor_ - 11) * _M_; + const int64_t redo_dispatch_exceed_ratio = factor_ <= 12 ? 1 : (1 << (factor_ - 13)); + + REFRESH_NUM_FIELD_WITH_CONFIG(redo_dispatcher_memory_limit, redo_dispatcher_limit, config.redo_dispatcher_memory_limit.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(extra_redo_dispatch_memory_size, extra_redo_dispatch_memory_size, config.extra_redo_dispatch_memory_size.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(redo_dispatched_memory_limit_exceed_ratio, redo_dispatch_exceed_ratio, config.redo_dispatched_memory_limit_exceed_ratio.get()); + _LOG_INFO("[AUTO_CONFIG][REDO_DISPATCH_MEMORY_LIMIT: %s(%ld)}][EXTRA_REDO_FOR_SKEW_PART: %s(%ld)]", + SIZE_TO_STR(redo_dispatcher_memory_limit_), redo_dispatcher_memory_limit_, + SIZE_TO_STR(extra_redo_dispatch_memory_size_), extra_redo_dispatch_memory_size_); + REFRESH_NUM_FIELD_WITH_CONFIG(part_trans_task_active_count_upper_bound, active_part_trans_task_upper_bound, config.part_trans_task_active_count_upper_bound.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(part_trans_task_reusable_count_upper_bound, reusable_part_trans_task_upper_bound, config.part_trans_task_reusable_count_upper_bound.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(ready_to_seq_task_upper_bound, ready_to_seq_task_upper_bound, config.ready_to_seq_task_upper_bound.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(storager_task_count_upper_bound, DEFAULT_STORAGER_TASK_UPPER_BOUND, config.storager_task_count_upper_bound.get()); + REFRESH_NUM_FIELD_WITH_CONFIG(storager_mem_percentage, DEFAULT_STORAGER_MEM_PERCENT, config.storager_mem_percentage.get()); + +} + +int64_t ObCDCAutoConfigMgr::get_log2_(int64_t value) +{ + int64_t res = 0; + + if (value > 0) { + // will modify value, but should not affect invoker + while (value >>= 1) ++res; + } + + return res; +} + +} // namespace libobcdc +} // namespace oceanbase diff --git a/src/logservice/libobcdc/src/ob_cdc_auto_config_mgr.h b/src/logservice/libobcdc/src/ob_cdc_auto_config_mgr.h new file mode 100644 index 0000000000..77a6afb6a5 --- /dev/null +++ b/src/logservice/libobcdc/src/ob_cdc_auto_config_mgr.h @@ -0,0 +1,94 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OCEANBASE_LIBOBCDC_AUTO_CONFIG_MGR_H_ +#define OCEANBASE_LIBOBCDC_AUTO_CONFIG_MGR_H_ + +#include "ob_cdc_macro_utils.h" +#include "lib/utility/ob_macro_utils.h" + +namespace oceanbase +{ +namespace libobcdc +{ +class ObLogConfig; +class ObCDCAutoConfigMgr +{ +public: + static ObCDCAutoConfigMgr &get_instance(); + ~ObCDCAutoConfigMgr() { reset(); } +public: + void init(const ObLogConfig &config); + void reset(); + void configure(const ObLogConfig &config); +public: + +private: + ObCDCAutoConfigMgr() { reset(); } +private: + void refresh_factor_(const ObLogConfig &config); + void init_queue_length_(const ObLogConfig &config); + void refresh_dynamic_config_(const ObLogConfig &config); + int64_t get_log2_(int64_t value); +private: + static const int64_t MAX_QUEUE_LENGTH; + + +// FIELD DEFINE BEGIN // +private: +// The automatic adjustment values of some parameters in adaptive mode correspond to memory_limit +// +// | memory_limit | 2G | 4G | 8G | 16G | 32G | 128G | +// | --------------------------------- | ----- | ----- | ----- | ----- | ----- | ----- | +// | factor | 11 | 12 | 13 | 14 | 15 | 17 | +// | auto_queue_length | 256 | 512 | 1024 | 2048 | 4096 | 16384 | +// | br_queue_length | 8192 | 16384 | 32768 | 65536 | 10W | 10W | +// | auto_part_trans_task_upper_bound | 20K | 22K | 24K | 26K | 28K | 32K | +// | redo_dispatcher_memory_limit | 32M | 64M | 128M | 256M | 512M | 2G | +// | extra_redo_dispatch_memory_size | 1K | 2M | 8M | 24M | 64M | 256M | +// | redo_dispatch_exceed_ratio | 1 | 1 | 1 | 2 | 4 | 16 | + int64_t factor_; +DEFINE_FIELD_WITH_GETTER(int64_t, br_queue_length); + +// thread queue length +DEFINE_FIELD_WITH_GETTER(int64_t, auto_queue_length); +DEFINE_FIELD_WITH_GETTER(int64_t, sequencer_queue_length); +DEFINE_FIELD_WITH_GETTER(int64_t, storager_queue_length); +DEFINE_FIELD_WITH_GETTER(int64_t, reader_queue_length); +DEFINE_FIELD_WITH_GETTER(int64_t, lob_data_merger_queue_length); +DEFINE_FIELD_WITH_GETTER(int64_t, msg_sorter_task_count_upper_limit); +DEFINE_FIELD_WITH_GETTER(int64_t, resource_collector_queue_length); +DEFINE_FIELD_WITH_GETTER(int64_t, formatter_queue_length); +DEFINE_FIELD_WITH_GETTER(int64_t, dml_parser_queue_length); + +// flow controll +DEFINE_FIELD_WITH_GETTER(int64_t, memory_limit); +DEFINE_FIELD_WITH_GETTER(int64_t, redo_dispatcher_memory_limit); +DEFINE_FIELD_WITH_GETTER(int64_t, extra_redo_dispatch_memory_size); +DEFINE_FIELD_WITH_GETTER(int64_t, redo_dispatched_memory_limit_exceed_ratio); +DEFINE_FIELD_WITH_GETTER(int64_t, part_trans_task_active_count_upper_bound); +DEFINE_FIELD_WITH_GETTER(int64_t, part_trans_task_reusable_count_upper_bound); +DEFINE_FIELD_WITH_GETTER(int64_t, ready_to_seq_task_upper_bound); +DEFINE_FIELD_WITH_GETTER(int64_t, storager_task_count_upper_bound); +DEFINE_FIELD_WITH_GETTER(int64_t, storager_mem_percentage); +// FIELD DEFINE END // + +DISABLE_COPY_ASSIGN(ObCDCAutoConfigMgr); + +}; + +#define CDC_CFG_MGR (ObCDCAutoConfigMgr::get_instance()) + +} // namespace libobcdc +} // namespace oceanbase + +#endif // OCEANBASE_LIBOBCDC_AUTO_CONFIG_MGR_H_ diff --git a/src/logservice/libobcdc/src/ob_cdc_lob_aux_meta_storager.cpp b/src/logservice/libobcdc/src/ob_cdc_lob_aux_meta_storager.cpp index 9d3846960c..4d55f796a2 100644 --- a/src/logservice/libobcdc/src/ob_cdc_lob_aux_meta_storager.cpp +++ b/src/logservice/libobcdc/src/ob_cdc_lob_aux_meta_storager.cpp @@ -197,9 +197,9 @@ int ObCDCLobAuxMetaStorager::memory_put_( memcpy(alloc_lob_data, lob_data, lob_data_len); LobAuxMetaValue value(static_cast(alloc_lob_data), lob_data_len); if (OB_FAIL(lob_aux_meta_map_.insert(key, value))) { - LOG_ERROR("lob_aux_meta_map_ insert failed", KR(ret), KCSTRING(key_type), K(key), K(lob_data), K(lob_data_len)); + LOG_ERROR("[OBCDC][LOB_AUX][PUT][MEM] lob_aux_meta_map_ insert failed", KR(ret), KCSTRING(key_type), K(key), K(lob_data), K(lob_data_len)); } else { - LOG_DEBUG("lob_aux_meta_map_ insert succ", KCSTRING(key_type), K(key), K(value)); + LOG_DEBUG("[OBCDC][LOB_AUX][PUT][MEM] lob_aux_meta_map_ insert succ", KCSTRING(key_type), K(key), K(value)); } } return ret; @@ -220,10 +220,10 @@ int ObCDCLobAuxMetaStorager::disk_put_( LOG_ERROR("get key_str fail", KR(ret), KCSTRING(key_type), K(key)); } else if (OB_FAIL(store_service_->put(cf_handle, key_str, ObSlice(lob_data, lob_data_len)))) { if (OB_IN_STOP_STATE != ret) { - LOG_ERROR("store_service_ put fail", KR(ret), KCSTRING(key_type), K(key), KCSTRING(key_str.c_str()), K(lob_data_len)); + LOG_ERROR("[OBCDC][LOB_AUX][PUT][DISK] store_service_ put fail", KR(ret), KCSTRING(key_type), K(key), KCSTRING(key_str.c_str()), K(lob_data_len)); } } else { - LOG_DEBUG("store_service_ insert succ", KCSTRING(key_type), K(key), KCSTRING(key_str.c_str()), K(lob_data_len)); + LOG_DEBUG("[OBCDC][LOB_AUX][PUT][DISK] store_service_ insert succ", KCSTRING(key_type), K(key), KCSTRING(key_str.c_str()), K(lob_data_len)); } return ret; } @@ -267,9 +267,9 @@ int ObCDCLobAuxMetaStorager::memory_get_( LobAuxMetaValue value; if (OB_FAIL(lob_aux_meta_map_.get(key, value))) { if (OB_ENTRY_NOT_EXIST != ret) { - LOG_ERROR("lob_aux_meta_map_ get failed", KR(ret), K(key)); + LOG_ERROR("[OBCDC][LOB_AUX][GET][MEM] lob_aux_meta_map_ get failed", KR(ret), K(key)); } else if (REACH_TIME_INTERVAL(10 * _SEC_)) { - LOG_WARN("lob_aux_meta_map_ get not exist, need retry", KR(ret), K(key)); + LOG_WARN("[OBCDC][LOB_AUX][GET][MEM] lob_aux_meta_map_ get not exist, need retry", KR(ret), K(key)); } } else { lob_data = value.lob_data_; @@ -295,9 +295,9 @@ int ObCDCLobAuxMetaStorager::disk_get_( LOG_ERROR("get key_str fail", KR(ret), K(key)); } else if (OB_FAIL(store_service_->get(cf_handle, key_str, value))) { if (OB_ENTRY_NOT_EXIST != ret) { - LOG_ERROR("get failed", KR(ret), K(key)); + LOG_ERROR("[OBCDC][LOB_AUX][GET][DISK] get failed", KR(ret), K(key)); } else if (REACH_TIME_INTERVAL(10 * _SEC_)) { - LOG_WARN("get not exist, need retry", KR(ret), K(key)); + LOG_WARN("[OBCDC][LOB_AUX][GET][DISK] get not exist, need retry", KR(ret), K(key)); } } else if (value.empty()) { ret = OB_INVALID_ARGUMENT; @@ -336,7 +336,7 @@ int ObCDCLobAuxMetaStorager::del( while (OB_SUCC(ret) && ! stop_flag && cur_lob_data_get_ctx) { if (OB_FAIL(del_lob_col_value_(commit_version, tenant_id, trans_id, aux_lob_meta_tid, *cur_lob_data_get_ctx, stop_flag))) { - LOG_ERROR("del_lob_col_value_ failed", KR(ret), K(tenant_id), K(trans_id), K(aux_lob_meta_tid)); + LOG_ERROR("[OBCDC][LOB_AUX][DEL][COL] del_lob_col_value_ failed", KR(ret), K(tenant_id), K(trans_id), K(aux_lob_meta_tid)); } else { cur_lob_data_get_ctx = cur_lob_data_get_ctx->get_next(); } @@ -473,11 +473,11 @@ int ObCDCLobAuxMetaStorager::memory_del_(const uint64_t tenant_id, const int64_t int ret = OB_SUCCESS; LobAuxMetaDataPurger lob_aux_meata_purger(*this, tenant_id, commit_version); if (OB_FAIL(lob_aux_meta_map_.remove_if(lob_aux_meata_purger))) { - LOG_ERROR("lob_aux_meta_map_ remove_if failed", KR(ret), K(tenant_id), K(commit_version)); + LOG_ERROR("[OBCDC][LOB_AUX][CLEAN_TASK][MEM] lob_aux_meta_map_ remove_if failed", KR(ret), K(tenant_id), K(commit_version)); } else { - LOG_INFO("ObCDCLobAuxMetaStorager del", K(tenant_id), K(commit_version), "purge_count", + LOG_INFO("[OBCDC][LOB_AUX][CLEAN_TASK][MEM] ObCDCLobAuxMetaStorager del", K(tenant_id), K(commit_version), "purge_count", lob_aux_meata_purger.purge_count_, "map_count", lob_aux_meta_map_.count(), - "memory_used", lob_aux_meta_allocator_.allocated()); + "memory_used", SIZE_TO_STR(lob_aux_meta_allocator_.allocated())); } return ret; } @@ -494,11 +494,11 @@ int ObCDCLobAuxMetaStorager::disk_del_(void* cf_family, const int64_t commit_ver end_key.append("_}"); if (OB_FAIL(store_service_->del_range(cf_family, begin_key, end_key))) { if (OB_IN_STOP_STATE != ret) { - LOG_ERROR("store_service_ del fail", KR(ret), "begin_key", begin_key.c_str(), + LOG_ERROR("[OBCDC][LOB_AUX][CLEAN_TASK][DISK] store_service_ del fail", KR(ret), "begin_key", begin_key.c_str(), "end_key", end_key.c_str(), K(commit_version)); } } else { - LOG_INFO("store_service_ del_range succ", K(commit_version), KCSTRING(end_key.c_str())); + LOG_INFO("[OBCDC][LOB_AUX][CLEAN_TASK][DISK] store_service_ del_range succ", K(commit_version), KCSTRING(end_key.c_str())); } return ret; } diff --git a/src/logservice/libobcdc/src/ob_cdc_macro_utils.h b/src/logservice/libobcdc/src/ob_cdc_macro_utils.h new file mode 100644 index 0000000000..52e2524530 --- /dev/null +++ b/src/logservice/libobcdc/src/ob_cdc_macro_utils.h @@ -0,0 +1,47 @@ +/** + * Copyright (c) 2023 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef _OB_CDC_MACRO_UTILS_H_ +#define _OB_CDC_MACRO_UTILS_H_ + +#include "lib/atomic/ob_atomic.h" + +namespace oceanbase +{ +namespace libobcdc +{ + +// define private field and corresponding getter and setter method +#define DEFINE_PRIVATE_FIELD(TYPE, FIELD_NAME) \ + private: \ + TYPE FIELD_NAME##_; + +#define DEFINE_PUBLIC_GETTER(TYPE, FIELD_NAME) \ + public: \ + TYPE get_##FIELD_NAME() const { return ATOMIC_LOAD(&FIELD_NAME##_); } + +#define DEFINE_PUBLIC_SETTER(TYPE, FIELD_NAME) \ + public: \ + void set_##FIELD_NAME(const TYPE FIELD_NAME) { ATOMIC_SET(&FIELD_NAME##_, FIELD_NAME); } + + +#define DEFINE_FIELD_WITH_GETTER(TYPE, FIELD_NAME) \ + DEFINE_PRIVATE_FIELD(TYPE, FIELD_NAME); \ + DEFINE_PUBLIC_GETTER(TYPE, FIELD_NAME); \ + DEFINE_PUBLIC_SETTER(TYPE, FIELD_NAME); + +// other macros + +} // namespace libobcdc +} // namespace oceanbase + +#endif diff --git a/src/logservice/libobcdc/src/ob_log_binlog_record_queue.cpp b/src/logservice/libobcdc/src/ob_log_binlog_record_queue.cpp index d66190ffd0..b12bdcabda 100644 --- a/src/logservice/libobcdc/src/ob_log_binlog_record_queue.cpp +++ b/src/logservice/libobcdc/src/ob_log_binlog_record_queue.cpp @@ -202,12 +202,12 @@ int BRQueue::pop_next_br_(ObLogBR *&data, const int64_t timeout) LOG_ERROR("pop data from fixed queue fail", KR(ret)); } } else { + cond_.signal(); bool need_accumulate_stat = false; if (OB_FAIL(do_stat_for_part_trans_task_count_(*data, need_accumulate_stat))) { LOG_ERROR("do_stat_for_part_trans_task_count_ fail", KR(ret), K(need_accumulate_stat)); } - cond_.signal(); } } diff --git a/src/logservice/libobcdc/src/ob_log_config.cpp b/src/logservice/libobcdc/src/ob_log_config.cpp index d97285d008..892740322b 100644 --- a/src/logservice/libobcdc/src/ob_log_config.cpp +++ b/src/logservice/libobcdc/src/ob_log_config.cpp @@ -100,6 +100,20 @@ int ObLogConfig::format_cluster_url() return ret; } +bool ObLogConfig::need_print_config(const std::string& config_key) const +{ + bool need_print = true; + + if ((0 == config_key.compare("cluster_password")) + || (0 == config_key.compare("tenant_password")) + || (0 == config_key.compare("archive_dest")) + || (0 == config_key.compare("ssl_external_kms_info"))) { + need_print = false; + } + + return need_print; +} + void ObLogConfig::print() const { static const int64_t BUF_SIZE = 1L << 22; @@ -119,9 +133,11 @@ void ObLogConfig::print() const TS_TO_STR(get_timestamp())); for (int64_t index = 0; index < configs.count(); index++) { - (void)databuff_printf(buf, size, pos, "%s [CONFIG] %-45s = %s\n", - TS_TO_STR(get_timestamp()), configs.at(index).key_.c_str(), - configs.at(index).val_.c_str()); + if (need_print_config(configs.at(index).key_)) { + (void)databuff_printf(buf, size, pos, "%s [CONFIG] %-45s = %s\n", + TS_TO_STR(get_timestamp()), configs.at(index).key_.c_str(), + configs.at(index).val_.c_str()); + } } (void)databuff_printf(buf, size, pos, diff --git a/src/logservice/libobcdc/src/ob_log_config.h b/src/logservice/libobcdc/src/ob_log_config.h index b20f28f2a3..7bce1ff6a3 100644 --- a/src/logservice/libobcdc/src/ob_log_config.h +++ b/src/logservice/libobcdc/src/ob_log_config.h @@ -74,6 +74,7 @@ public: static ObLogConfig &get_instance(); public: + virtual bool need_print_config(const std::string& config_key) const override; void print() const; int load_from_map(const ConfigMap& configs, const int64_t version = 0, @@ -100,15 +101,16 @@ public: DEF_INT(dml_parser_thread_num, OB_CLUSTER_PARAMETER, "5", "[1,]", "DML parser thread number"); DEF_INT(ddl_parser_thread_num, OB_CLUSTER_PARAMETER, "1", "[1,]", "DDL parser thread number"); DEF_INT(sequencer_thread_num, OB_CLUSTER_PARAMETER, "5", "[1,]", "sequencer thread number"); - DEF_INT(sequencer_queue_length, OB_CLUSTER_PARAMETER, "102400", "[1,]", "sequencer queue length"); + DEF_INT(sequencer_queue_length, OB_CLUSTER_PARAMETER, "0", "[0,]", "sequencer queue length"); DEF_INT(formatter_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "formatter thread number"); DEF_INT(lob_data_merger_thread_num, OB_CLUSTER_PARAMETER, "2", "[1,]", "lob data merger thread number"); DEF_CAP(batch_buf_size, OB_CLUSTER_PARAMETER, "20MB", "[2MB,]", "batch buf size"); DEF_INT(batch_buf_count, OB_CLUSTER_PARAMETER, "10", "[5,]", "batch buf count"); DEF_INT(storager_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "storager thread number"); - DEF_INT(storager_queue_length, OB_CLUSTER_PARAMETER, "102400", "[1,]", "storager queue length"); + DEF_INT(storager_queue_length, OB_CLUSTER_PARAMETER, "0", "[0,]", "storager queue length"); DEF_INT(reader_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "reader thread number"); - DEF_INT(reader_queue_length, OB_CLUSTER_PARAMETER, "102400", "[1,]", "reader queue length"); + DEF_INT(reader_queue_length, OB_CLUSTER_PARAMETER, "0", "[0,]", "reader queue length"); + DEF_INT(br_queue_length, OB_CLUSTER_PARAMETER, "0", "[0, ]", "user_binlog_record queue length"); DEF_INT(cached_schema_version_count, OB_CLUSTER_PARAMETER, "32", "[1,]", "cached schema version count"); DEF_INT(history_schema_version_count, OB_CLUSTER_PARAMETER, "16", "[1,]", "history schema version count"); DEF_INT(resource_collector_thread_num, OB_CLUSTER_PARAMETER, "10", "[1,]", "resource collector thread number"); @@ -117,19 +119,18 @@ public: DEF_INT(instance_index, OB_CLUSTER_PARAMETER, "0", "[0,]", "store instance index, start from 0"); DEF_INT(part_trans_task_prealloc_count, OB_CLUSTER_PARAMETER, "300000", "[1,]", "part trans task pre-alloc count"); - DEF_INT(part_trans_task_active_count_upper_bound, OB_CLUSTER_PARAMETER, "200000", "[1,]", + DEF_INT(part_trans_task_active_count_upper_bound, OB_CLUSTER_PARAMETER, "0", "[0,]", "active part trans task count upper bound"); - DEF_INT(storager_task_count_upper_bound, OB_CLUSTER_PARAMETER, "1000", "[1,]", + DEF_INT(storager_task_count_upper_bound, OB_CLUSTER_PARAMETER, "0", "[0,]", "storager task count upper bound"); - DEF_INT(storager_mem_percentage, OB_CLUSTER_PARAMETER, "2", "[1,]", + DEF_INT(storager_mem_percentage, OB_CLUSTER_PARAMETER, "0", "[0,]", "storager memory percentage"); - T_DEF_BOOL(skip_recycle_data, OB_CLUSTER_PARAMETER, 0, "0:not_skip, 1:skip") - DEF_INT(part_trans_task_reusable_count_upper_bound, OB_CLUSTER_PARAMETER, "10240", "[1,]", + T_DEF_BOOL(skip_recycle_data, OB_CLUSTER_PARAMETER, 0, "0:not_skip, 1:skip"); + DEF_INT(part_trans_task_reusable_count_upper_bound, OB_CLUSTER_PARAMETER, "0", "[0,]", "reusable parti trans task count upper bound"); - DEF_INT(ready_to_seq_task_upper_bound, OB_CLUSTER_PARAMETER, "20000", "[1,]", + DEF_INT(ready_to_seq_task_upper_bound, OB_CLUSTER_PARAMETER, "0", "[0,]", "ready to sequencer task count upper bound"); DEF_INT(part_trans_task_dynamic_alloc, OB_CLUSTER_PARAMETER, "1", "[0,1]", "part trans task dynamic alloc"); - DEF_CAP(part_trans_task_page_size, OB_CLUSTER_PARAMETER, "8KB", "[1B,]", "part trans task page size"); DEF_INT(part_trans_task_prealloc_page_count, OB_CLUSTER_PARAMETER, "20000", "[1,]", "part trans task prealloc page count"); // Log_level=INFO in the startup scenario, and then optimize the schema to WARN afterwards @@ -169,7 +170,7 @@ public: DEF_INT(log_entry_task_prealloc_count, OB_CLUSTER_PARAMETER, "100000", "[1,]", "log entry task pre-alloc count"); - DEF_INT(binlog_record_prealloc_count, OB_CLUSTER_PARAMETER, "100000", "[1,]", "binlog record pre-alloc count"); + DEF_INT(binlog_record_prealloc_count, OB_CLUSTER_PARAMETER, "200000", "[1,]", "binlog record pre-alloc count"); DEF_STR(store_service_path, OB_CLUSTER_PARAMETER, "./storage", "store sevice path"); @@ -310,7 +311,7 @@ public: T_DEF_INT_INFT(fetch_log_rpc_timeout_sec, OB_CLUSTER_PARAMETER, 15, 1, "fetch log rpc timeout in seconds"); // Upper limit of progress difference between partitions, in seconds - T_DEF_INT_INFT(progress_limit_sec_for_dml, OB_CLUSTER_PARAMETER, 300, 1, "dml progress limit in seconds"); + T_DEF_INT_INFT(progress_limit_sec_for_dml, OB_CLUSTER_PARAMETER, 30, 1, "dml progress limit in seconds"); // The Sys Tenant is not filtered by default T_DEF_BOOL(enable_filter_sys_tenant, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); @@ -321,7 +322,7 @@ public: // A means of fault tolerance for LDG T_DEF_BOOL(enable_continue_use_cache_server_list, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); - T_DEF_INT_INFT(progress_limit_sec_for_ddl, OB_CLUSTER_PARAMETER, 3600, 1, "ddl progress limit in seconds"); + T_DEF_INT_INFT(progress_limit_sec_for_ddl, OB_CLUSTER_PARAMETER, 60, 1, "ddl progress limit in seconds"); // LS fetch progress update timeout in seconds // If the logs are not fetched after a certain period of time, the stream will be cut @@ -344,6 +345,8 @@ public: // pause fetcher T_DEF_BOOL(pause_fetcher, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); + // pause dispatch redo + T_DEF_BOOL(pause_dispatch_redo, OB_CLUSTER_PARAMETER, 0, "0:disabled, 1:enabled"); // Maximum number of tasks supported by the timer T_DEF_INT_INFT(timer_task_count_upper_limit, OB_CLUSTER_PARAMETER, 1024, 1, "max timer task count"); @@ -403,15 +406,17 @@ public: // Not on by default (participatn-by-participant output) T_DEF_BOOL(enable_output_trans_order_by_sql_operation, OB_CLUSTER_PARAMETER, 1, "0:disabled, 1:enabled"); // redo dispatcher memory limit - DEF_CAP(redo_dispatcher_memory_limit, OB_CLUSTER_PARAMETER, "64M", "[0M,]", "redo dispatcher memory limit"); - DEF_CAP(extra_redo_dispatch_memory_size, OB_CLUSTER_PARAMETER, "1M", "[0, 512M]", "extra redo dispatcher memory for data skew participant"); + DEF_CAP(redo_dispatcher_memory_limit, OB_CLUSTER_PARAMETER, "0M", "[0M,]", "redo dispatcher memory limit"); // redo diepatcher memory limit ratio for output br by sql operation(compare with redo_dispatcher_memory_limit) - T_DEF_INT_INFT(redo_dispatched_memory_limit_exceed_ratio, OB_CLUSTER_PARAMETER, 2, 1, + T_DEF_INT_INFT(redo_dispatched_memory_limit_exceed_ratio, OB_CLUSTER_PARAMETER, 0, 0, "redo_dispatcher_memory_limit ratio for output by sql operation order"); + DEF_CAP(extra_redo_dispatch_memory_size, OB_CLUSTER_PARAMETER, "0KB", "[0, 512M]", "extra redo dispatcher memory for data skew participant"); + T_DEF_INT(pause_redo_dispatch_task_count_threshold, OB_CLUSTER_PARAMETER, 80, 0, 100, "task cound percent threshold for pause redo dispatch"); + T_DEF_INT(memory_usage_warn_threshold, OB_CLUSTER_PARAMETER, 85, 10, 100, "memory usage wan threshold, may pause fetch while reach the threshold"); // sorter thread num T_DEF_INT(msg_sorter_thread_num, OB_CLUSTER_PARAMETER, 1, 1, 32, "trans msg sorter thread num"); // sorter thread - T_DEF_INT_INFT(msg_sorter_task_count_upper_limit, OB_CLUSTER_PARAMETER, 200000, 1, "trans msg sorter thread num"); + T_DEF_INT_INFT(msg_sorter_task_count_upper_limit, OB_CLUSTER_PARAMETER, 0, 0, "trans msg sorter task count per thread"); // ------------------------------------------------------------------------ // Test mode, used only in obtest and other test tool scenarios diff --git a/src/logservice/libobcdc/src/ob_log_ddl_parser.cpp b/src/logservice/libobcdc/src/ob_log_ddl_parser.cpp index 71b4809bc6..bff5a0da02 100644 --- a/src/logservice/libobcdc/src/ob_log_ddl_parser.cpp +++ b/src/logservice/libobcdc/src/ob_log_ddl_parser.cpp @@ -124,7 +124,7 @@ int ObLogDdlParser::push(PartTransTask &task, const int64_t timeout) LOG_INFO("DDL parser has been stoped"); ret = OB_IN_STOP_STATE; } else if (OB_FAIL(DdlParserThread::push(&task, push_hash, timeout))) { - if (OB_TIMEOUT != ret) { + if (OB_TIMEOUT != ret && OB_IN_STOP_STATE != ret) { LOG_ERROR("push task into DDL queue thread fail", KR(ret), K(task), K(push_hash)); } } else { diff --git a/src/logservice/libobcdc/src/ob_log_dml_parser.cpp b/src/logservice/libobcdc/src/ob_log_dml_parser.cpp index da5978fd58..d77760ca6f 100644 --- a/src/logservice/libobcdc/src/ob_log_dml_parser.cpp +++ b/src/logservice/libobcdc/src/ob_log_dml_parser.cpp @@ -136,7 +136,7 @@ int ObLogDmlParser::push(ObLogEntryTask &task, const int64_t timeout) const uint64_t hash_value = ATOMIC_FAA(&round_value_, 1); if (OB_FAIL(DmlParserThread::push(&task, hash_value, timeout))) { - if (OB_TIMEOUT != ret) { + if (OB_TIMEOUT != ret && OB_IN_STOP_STATE != ret) { LOG_ERROR("push task into DML queue thread fail", KR(ret), K(task)); } } else { diff --git a/src/logservice/libobcdc/src/ob_log_fetch_log_rpc.cpp b/src/logservice/libobcdc/src/ob_log_fetch_log_rpc.cpp index d895637916..a24876dbb8 100644 --- a/src/logservice/libobcdc/src/ob_log_fetch_log_rpc.cpp +++ b/src/logservice/libobcdc/src/ob_log_fetch_log_rpc.cpp @@ -180,7 +180,9 @@ int FetchLogSRpc::RpcCB::process() const common::ObAddr &svr = RpcCBBase::dst_; if (OB_FAIL(do_process_(rcode, &result))) { - LOG_ERROR("process fetch log callback fail", KR(ret), K(result), K(rcode), K(svr)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process fetch log callback fail", KR(ret), K(result), K(rcode), K(svr)); + } } // Aone: // Note: destruct response after asynchronous RPC processing @@ -200,7 +202,9 @@ void FetchLogSRpc::RpcCB::on_timeout() to_cstring(svr)); if (OB_FAIL(do_process_(rcode, NULL))) { - LOG_ERROR("process fetch log callback on timeout fail", KR(ret), K(rcode), K(svr)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process fetch log callback on timeout fail", KR(ret), K(rcode), K(svr)); + } } } @@ -217,7 +221,9 @@ void FetchLogSRpc::RpcCB::on_invalid() to_cstring(svr)); if (OB_FAIL(do_process_(rcode, NULL))) { - LOG_ERROR("process fetch log callback on invalid fail", KR(ret), K(rcode), K(svr)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process fetch log callback on invalid fail", KR(ret), K(rcode), K(svr)); + } } } @@ -711,7 +717,9 @@ int FetchLogARpc::handle_rpc_response(RpcRequest &rpc_req, // Assign log stream fetching tasks as needed if (OB_SUCCESS == ret && need_dispatch_stream_task) { if (OB_FAIL(stream_worker_->dispatch_stream_task(host_, "RpcCallback"))) { - LOG_ERROR("dispatch stream task fail", KR(ret)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch stream task fail", KR(ret)); + } } } } @@ -1111,7 +1119,9 @@ int FetchLogARpc::RpcCB::process() const common::ObAddr &svr = RpcCBBase::dst_; if (OB_FAIL(do_process_(rcode, &result))) { - LOG_ERROR("process fetch log callback fail", KR(ret), K(result), K(rcode), K(svr), K_(host)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process fetch log callback fail", KR(ret), K(result), K(rcode), K(svr), K_(host)); + } } // Aone: // Note: Active destructe response after asynchronous RPC processing @@ -1131,7 +1141,9 @@ void FetchLogARpc::RpcCB::on_timeout() to_cstring(svr)); if (OB_FAIL(do_process_(rcode, NULL))) { - LOG_ERROR("process fetch log callback on timeout fail", KR(ret), K(rcode), K(svr), K_(host)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process fetch log callback on timeout fail", KR(ret), K(rcode), K(svr), K_(host)); + } } } @@ -1148,7 +1160,9 @@ void FetchLogARpc::RpcCB::on_invalid() to_cstring(svr)); if (OB_FAIL(do_process_(rcode, NULL))) { - LOG_ERROR("process fetch log callback on invalid fail", KR(ret), K(rcode), K(svr), K_(host)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("process fetch log callback on invalid fail", KR(ret), K(rcode), K(svr), K_(host)); + } } } @@ -1164,7 +1178,9 @@ int FetchLogARpc::RpcCB::do_process_(const ObRpcResultCode &rcode, const ObCdcLS } // Processing RPC response results else if (OB_FAIL(rpc_host.handle_rpc_response(rpc_req, rcode, resp))) { - LOG_ERROR("set fetch log response fail", KR(ret), K(resp), K(rcode), K(rpc_req)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("set fetch log response fail", KR(ret), K(resp), K(rcode), K(rpc_req)); + } } else { // success } diff --git a/src/logservice/libobcdc/src/ob_log_fetch_log_rpc.h b/src/logservice/libobcdc/src/ob_log_fetch_log_rpc.h index 9fe7109516..1b4d86f727 100644 --- a/src/logservice/libobcdc/src/ob_log_fetch_log_rpc.h +++ b/src/logservice/libobcdc/src/ob_log_fetch_log_rpc.h @@ -250,6 +250,7 @@ public: int64_t get_flying_request_count(); void print_flying_request_list(); + bool is_rpc_ready() const { return State::READY == state_; } private: int alloc_rpc_request_(const share::ObLSID &ls_id, diff --git a/src/logservice/libobcdc/src/ob_log_fetcher.cpp b/src/logservice/libobcdc/src/ob_log_fetcher.cpp index 5ca931de11..73e8523fe4 100644 --- a/src/logservice/libobcdc/src/ob_log_fetcher.cpp +++ b/src/logservice/libobcdc/src/ob_log_fetcher.cpp @@ -951,13 +951,13 @@ int ObLogFetcher::next_heartbeat_timestamp_(int64_t &heartbeat_tstamp, const int } } else if (OB_UNLIKELY(OB_INVALID_TIMESTAMP == ddl_handle_progress)) { - LOG_ERROR("get DDL handle progress is invalid", K(ddl_handle_progress), K(ddl_handle_lsn)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("get DDL handle progress is invalid", KR(ret), K(ddl_handle_progress), K(ddl_handle_lsn)); } // Then iterate through all the partitions to get the distribution progress of each partition, i.e. the progress of Fetcher's distribution data // Note: Here we also get the progress of the DDL distribution, which is only used for printing else if (OB_FAIL(ls_fetch_mgr_.for_each_ls(hb_func))) { - LOG_ERROR("for each part fetch ctx fail", KR(ret)); + LOG_WARN("for each part fetch ctx fail", KR(ret)); } else { int64_t data_progress = hb_func.data_progress_; // TODO diff --git a/src/logservice/libobcdc/src/ob_log_fetcher_dead_pool.cpp b/src/logservice/libobcdc/src/ob_log_fetcher_dead_pool.cpp index 40caed4375..e228555543 100644 --- a/src/logservice/libobcdc/src/ob_log_fetcher_dead_pool.cpp +++ b/src/logservice/libobcdc/src/ob_log_fetcher_dead_pool.cpp @@ -100,7 +100,9 @@ int ObLogFetcherDeadPool::push(LSFetchCtx *task) LOG_DEBUG("[STAT] [DEAD_POOL] [DISPATCH_IN]", K(task), KPC(task)); if (OB_FAIL(DeadPoolThread::push(task, task->hash()))) { - LOG_ERROR("push task fail", KR(ret), K(task), K(task->hash())); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push task fail", KR(ret), K(task), K(task->hash())); + } } else { // 成功 } diff --git a/src/logservice/libobcdc/src/ob_log_fetcher_idle_pool.cpp b/src/logservice/libobcdc/src/ob_log_fetcher_idle_pool.cpp index 5d96242c4d..7f1918341b 100644 --- a/src/logservice/libobcdc/src/ob_log_fetcher_idle_pool.cpp +++ b/src/logservice/libobcdc/src/ob_log_fetcher_idle_pool.cpp @@ -103,7 +103,9 @@ int ObLogFetcherIdlePool::push(LSFetchCtx *task) LOG_DEBUG("[STAT] [IDLE_POOL] [DISPATCH_IN]", K(task), KPC(task)); if (OB_FAIL(IdlePoolThread::push(task, task->hash()))) { - LOG_ERROR("push task fail", KR(ret), K(task), K(task->hash())); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push task fail", KR(ret), K(task), K(task->hash())); + } } else { // success } @@ -261,7 +263,9 @@ int ObLogFetcherIdlePool::do_request_(const int64_t thread_index, FetchTaskList const char *dispatch_reason = "SvrListReady"; if (OB_FAIL(stream_worker_->dispatch_fetch_task(*task, dispatch_reason))) { - LOG_ERROR("dispatch fetch task fail", KR(ret), KPC(task), K(dispatch_reason)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch fetch task fail", KR(ret), KPC(task), K(dispatch_reason)); + } } else { // You cannot continue to operate the task afterwards } @@ -300,7 +304,9 @@ int ObLogFetcherIdlePool::handle_task_(LSFetchCtx *task, bool &need_dispatch) // Requires a successful update of the server list before leaving the idle pool if (task->need_update_svr_list()) { if (OB_FAIL(task->update_svr_list())) { - LOG_ERROR("update server list fail", KR(ret), KPC(task)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("update server list fail", KR(ret), KPC(task)); + } } } // locate the start LSN diff --git a/src/logservice/libobcdc/src/ob_log_formatter.cpp b/src/logservice/libobcdc/src/ob_log_formatter.cpp index 3f89963046..66f89b5e6c 100644 --- a/src/logservice/libobcdc/src/ob_log_formatter.cpp +++ b/src/logservice/libobcdc/src/ob_log_formatter.cpp @@ -226,11 +226,11 @@ int ObLogFormatter::push(IStmtTask *stmt_task, volatile bool &stop_flag) int ret = OB_SUCCESS; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogFormatter has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogFormatter has not been initialized", KR(ret)); } else if (OB_ISNULL(stmt_task)) { - LOG_ERROR("invalid arguments", K(stmt_task)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid arguments", K(stmt_task), KR(ret)); } else { // Ensure that all stmt of ObLogEntryTask are pushed to the same queue const uint64_t hash_value = ATOMIC_FAA(&round_value_, 1); @@ -297,8 +297,8 @@ int ObLogFormatter::get_task_count( stmt_in_lob_merger_count = 0; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("parser has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("parser has not been initialized", KR(ret)); } else if (OB_FAIL(get_total_task_num(br_count))) { LOG_ERROR("get_total_task_num fail", KR(ret), K(br_count)); } else { @@ -424,17 +424,17 @@ int ObLogFormatter::init_binlog_record_for_dml_stmt_task_( PartTransTask *part_trans_task = NULL; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogFormatter has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogFormatter has not been initialized", KR(ret)); } else if (OB_ISNULL(stmt_task)) { - LOG_ERROR("invalid arguments", K(stmt_task)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid arguments", KR(ret), K(stmt_task)); } else if (OB_ISNULL(log_entry_task = &(stmt_task->get_redo_log_entry_task()))) { - LOG_ERROR("log_entry_task is NULL", KPC(stmt_task)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("log_entry_task is NULL", KR(ret), KPC(stmt_task)); } else if (OB_ISNULL(part_trans_task = static_cast(log_entry_task->get_host()))) { - LOG_ERROR("part_trans_task is NULL", K(log_entry_task)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("part_trans_task is NULL", KR(ret), K(log_entry_task)); } else if (stmt_task->is_callback()) { // Binlog record must have been generated before when is callback br = stmt_task->get_binlog_record(); @@ -443,8 +443,8 @@ int ObLogFormatter::init_binlog_record_for_dml_stmt_task_( if (OB_FAIL(br_pool_->alloc(br, log_entry_task, stmt_task))) { LOG_ERROR("alloc binlog record from pool fail", KR(ret), K(stmt_task)); } else if (OB_ISNULL(br)) { - LOG_ERROR("alloc binlog record fail", K(br)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("alloc binlog record fail", KR(ret), K(br)); } else { // select ... for update to record DF_LOCK log to prevent loss of row lock information on the // standby machine in the event of a master/standby switchover, no synchronization required @@ -1063,14 +1063,14 @@ int ObLogFormatter::build_row_value_( ObTimeZoneInfoWrap *tz_info_wrap = nullptr; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogFormatter has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogFormatter has not been initialized", KR(ret)); } else if (OB_ISNULL(rv) || OB_ISNULL(stmt_task) || OB_ISNULL(simple_table_schema)) { - LOG_ERROR("invalid argument", K(rv), K(stmt_task), K(simple_table_schema)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(rv), K(stmt_task), K(simple_table_schema)); } else if (OB_ISNULL(meta_manager_)) { - LOG_ERROR("meta_manager_ is null", K(meta_manager_)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("meta_manager_ is null", KR(ret), K(meta_manager_)); } else if (OB_FAIL(meta_manager_->get_table_schema_meta( simple_table_schema->get_schema_version(), simple_table_schema->get_tenant_id(), @@ -1082,8 +1082,8 @@ int ObLogFormatter::build_row_value_( "table_id", simple_table_schema->get_table_id(), "table_name", simple_table_schema->get_table_name(), KPC(tb_schema_info)); } else if (OB_ISNULL(tb_schema_info)) { - LOG_ERROR("tb_schema_info is null", K(tb_schema_info)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("tb_schema_info is null", KR(ret), K(tb_schema_info)); } else if (OB_ISNULL(tz_info_getter)) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("tz_info_getter is nullptr", KR(ret), K(tz_info_getter)); @@ -1242,14 +1242,14 @@ int ObLogFormatter::fill_normal_cols_( int ret = OB_SUCCESS; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogFormatter has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogFormatter has not been initialized", KR(ret)); } else if (OB_ISNULL(rv) || OB_ISNULL(simple_table_schema)) { - LOG_ERROR("invalid argument", K(rv), K(simple_table_schema)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(rv), K(simple_table_schema)); } else if (OB_ISNULL(meta_manager_)) { - LOG_ERROR("meta_manager_ is null", K(meta_manager_)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("meta_manager_ is null", KR(ret), K(meta_manager_)); } else { const int64_t column_count = rv->column_num_; ColValue *cv = cv_list.head_; @@ -1377,14 +1377,14 @@ int ObLogFormatter::fill_rowkey_cols_( int ret = OB_SUCCESS; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogFormatter has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogFormatter has not been initialized", KR(ret)); } else if (OB_ISNULL(rv) || OB_ISNULL(simple_table_schema)) { - LOG_ERROR("invalid argument", K(rv), K(simple_table_schema)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", K(rv), K(simple_table_schema), KR(ret)); } else if (OB_ISNULL(meta_manager_)) { - LOG_ERROR("meta_manager_ is null", K(meta_manager_)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("meta_manager_ is null", KR(ret), K_(meta_manager)); } else { ColValue *cv_node = rowkey_cols.head_; int64_t rowkey_count = rowkey_cols.num_; @@ -1396,8 +1396,8 @@ int ObLogFormatter::fill_rowkey_cols_( OB_SUCC(ret) && rowkey_index < rowkey_count; rowkey_index++, cv_node = cv_node->next_) { if (OB_ISNULL(cv_node)) { - LOG_ERROR("column value node is NULL", K(rowkey_index), K(rowkey_count), K(cv_node)); ret = OB_INVALID_DATA; + LOG_ERROR("column value node is NULL", KR(ret), K(rowkey_index), K(rowkey_count), K(cv_node)); } else if (OB_FAIL(tb_schema_info.get_column_schema_info_for_rowkey(rowkey_index, column_schema_info))) { LOG_ERROR("get_column_schema_info_for_rowkey failed", KR(ret), "table_id", simple_table_schema->get_table_id(), @@ -1439,14 +1439,14 @@ int ObLogFormatter::fill_orig_default_value_( int ret = OB_SUCCESS; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogFormatter has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogFormatter has not been initialized", KR(ret)); } else if (OB_ISNULL(rv) || OB_ISNULL(simple_table_schema)) { - LOG_ERROR("invalid argument", K(rv), K(simple_table_schema)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(rv), K(simple_table_schema)); } else if (OB_ISNULL(meta_manager_)) { - LOG_ERROR("meta_manager_ is null", K(meta_manager_)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("meta_manager_ is null", KR(ret), K(meta_manager_)); } else { int64_t column_count = rv->column_num_; int64_t table_schema_version = simple_table_schema->get_schema_version(); @@ -1539,8 +1539,8 @@ int ObLogFormatter::set_src_category_(IBinlogRecord *br_data, int ret = OB_SUCCESS; if (OB_ISNULL(br_data) || OB_ISNULL(rv)) { - LOG_ERROR("invalid argument", K(br_data), K(rv)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(br_data), K(rv)); } else { int src_category = SRC_NO; @@ -1573,18 +1573,18 @@ int ObLogFormatter::build_binlog_record_( const uint64_t table_id = simple_table_schema->get_table_id(); if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogFormatter has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogFormatter has not been initialized", KR(ret)); } else if (OB_ISNULL(br) || OB_ISNULL(rv) || OB_ISNULL(simple_table_schema)) { - LOG_ERROR("invalid argument", K(br), K(rv), K(simple_table_schema)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(br), K(rv), K(simple_table_schema)); } else if (OB_ISNULL(br_data = br->get_data())) { - LOG_ERROR("binlog record data is invalid", K(br)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("binlog record data is invalid", KR(ret), K(br)); } else if (OB_ISNULL(rv->new_column_array_) || OB_ISNULL(rv->old_column_array_)) { + ret = OB_INVALID_ARGUMENT; LOG_ERROR("invalid row value, new_column_array or old_column_array is invalid", K(rv->new_column_array_), K(rv->old_column_array_)); - ret = OB_INVALID_ARGUMENT; } else if (OB_FAIL(is_hbase_mode_put_(table_id, dml_flag, rv->column_num_, new_column_cnt, rv->contain_old_column_, is_hbase_mode_put))) { LOG_ERROR("is_hbase_mode_put_ fail", KR(ret), K(table_id), K(dml_flag), @@ -1666,8 +1666,8 @@ int ObLogFormatter::is_hbase_mode_put_(const uint64_t table_id, if (enable_hbase_mode_) { if (OB_ISNULL(hbase_util_)) { - LOG_ERROR("hbase_util_ is null", K(hbase_util_)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("hbase_util_ is null", KR(ret), K(hbase_util_)); } else if (OB_FAIL(hbase_util_->is_hbase_table(table_id, is_hbase_table))) { LOG_ERROR("ObLogHbaseUtil is_hbase_table fail", KR(ret), K(table_id), K(is_hbase_table)); } else if (is_hbase_table && ObDmlFlag::DF_UPDATE == dml_flag && false == contain_old_column) { @@ -1704,8 +1704,8 @@ int ObLogFormatter::format_dml_delete_(IBinlogRecord *br_data, const RowValue *r int ret = OB_SUCCESS; if (OB_ISNULL(br_data) || OB_ISNULL(row_value)) { - LOG_ERROR("invalid argument", K(br_data), K(row_value)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(br_data), K(row_value)); } else { for (int64_t i = 0; OB_SUCCESS == ret && i < row_value->column_num_; i++) { // Handling primary key values @@ -1716,8 +1716,8 @@ int ObLogFormatter::format_dml_delete_(IBinlogRecord *br_data, const RowValue *r ObString *str = row_value->new_columns_[i]; if (OB_ISNULL(str)) { - LOG_ERROR("rowkey column is NULL, unexcepted error", K(i), K(row_value->column_num_)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("rowkey column is NULL, unexcepted error", KR(ret), K(i), K(row_value->column_num_)); } else { br_data->putOld(str->ptr(), str->length()); } @@ -1759,8 +1759,8 @@ int ObLogFormatter::format_dml_insert_(IBinlogRecord *br_data, const RowValue *r int ret = OB_SUCCESS; if (OB_ISNULL(br_data) || OB_ISNULL(row_value)) { - LOG_ERROR("invalid argument", K(br_data), K(row_value)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(br_data), K(row_value)); } else { for (int64_t i = 0; OB_SUCCESS == ret && i < row_value->column_num_; i++) { if (!row_value->is_changed_[i]) { @@ -1797,8 +1797,8 @@ int ObLogFormatter::format_dml_update_(IBinlogRecord *br_data, const RowValue *r int ret = OB_SUCCESS; if (OB_ISNULL(br_data) || OB_ISNULL(row_value)) { - LOG_ERROR("invalid argument", K(br_data), K(row_value)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(br_data), K(row_value)); } else { for (int i = 0; OB_SUCCESS == ret && i < row_value->column_num_; i++) { if (! row_value->is_changed_[i]) { @@ -1889,8 +1889,8 @@ int ObLogFormatter::get_schema_with_online_schema_( int ret = OB_SUCCESS; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogFormatter has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogFormatter has not been initialized", KR(ret)); } else if (OB_ISNULL(schema_getter_) || OB_UNLIKELY(version <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_ERROR("invalid argument", KR(ret), KP_(schema_getter), K(version)); diff --git a/src/logservice/libobcdc/src/ob_log_instance.cpp b/src/logservice/libobcdc/src/ob_log_instance.cpp index 33fbdb707a..a94b5030bf 100644 --- a/src/logservice/libobcdc/src/ob_log_instance.cpp +++ b/src/logservice/libobcdc/src/ob_log_instance.cpp @@ -57,6 +57,7 @@ #include "ob_log_start_schema_matcher.h" // ObLogStartSchemaMatcher #include "ob_log_tenant_mgr.h" // IObLogTenantMgr #include "ob_log_rocksdb_store_service.h" // RocksDbStoreService +#include "ob_cdc_auto_config_mgr.h" // CDC_CFG_MGR #include "ob_log_trace_id.h" #include "share/ob_simple_mem_limit_getter.h" @@ -504,7 +505,7 @@ int ObLogInstance::init_global_kvcache_() } else if (OB_FAIL(lib::ObResourceMgr::get_instance().set_cache_washer(ObKVGlobalCache::get_instance()))) { LOG_ERROR("Fail to set_cache_washer", KR(ret)); } else { - LOG_INFO("ObKVGlobalCache init succ", "max_cached_size", SIZE_TO_STR(DEFAULT_QUEUE_SIZE)); + LOG_INFO("ObKVGlobalCache init succ", "max_cached_size", SIZE_TO_STR(DEFAULT_MAX_CACHE_SIZE)); } return ret; @@ -565,13 +566,12 @@ int ObLogInstance::init_common_(uint64_t start_tstamp_ns, ERROR_CALLBACK err_cb) LOG_ERROR("init fifo allocator fail", KR(ret)); } else if (OB_FAIL(trans_task_pool_.init(&trans_task_pool_alloc_, TCONF.part_trans_task_prealloc_count, - TCONF.part_trans_task_page_size, 1 == TCONF.part_trans_task_dynamic_alloc, TCONF.part_trans_task_prealloc_page_count))) { LOG_ERROR("init task pool fail", KR(ret)); } else if (OB_FAIL(hbase_util_.init())) { LOG_ERROR("init hbase_util_ fail", KR(ret)); - } else if (OB_FAIL(br_queue_.init(DEFAULT_QUEUE_SIZE))) { + } else if (OB_FAIL(br_queue_.init(CDC_CFG_MGR.get_br_queue_length()))) { LOG_ERROR("init binlog record queue fail", KR(ret)); } else if (OB_FAIL(init_global_tenant_manager_())) { LOG_ERROR("init_global_tenant_manager_ fail", KR(ret)); @@ -642,6 +642,8 @@ int ObLogInstance::dump_config_() } } + CDC_CFG_MGR.init(TCONF); + return ret; } @@ -727,7 +729,7 @@ int ObLogInstance::init_components_(const uint64_t start_tstamp_ns) ObBackupPathString archive_dest(archive_dest_str); const bool enable_ssl_client_authentication = (1 == TCONF.ssl_client_authentication); const bool enable_sort_by_seq_no = (1 == TCONF.enable_output_trans_order_by_sql_operation); - const int64_t redo_dispatcher_mem_limit = TCONF.redo_dispatcher_memory_limit.get(); + const int64_t redo_dispatcher_mem_limit = CDC_CFG_MGR.get_redo_dispatcher_memory_limit(); enable_filter_sys_tenant_ = (0 != TCONF.enable_filter_sys_tenant); drc_message_factory_binlog_record_type_.assign(drc_message_factory_binlog_record_type_str, @@ -856,7 +858,8 @@ int ObLogInstance::init_components_(const uint64_t start_tstamp_ns) INIT(meta_manager_, ObLogMetaManager, &obj2str_helper_, enable_output_hidden_primary_key); INIT(resource_collector_, ObLogResourceCollector, - TCONF.resource_collector_thread_num, TCONF.resource_collector_thread_num_for_br, DEFAULT_QUEUE_SIZE, + TCONF.resource_collector_thread_num, TCONF.resource_collector_thread_num_for_br, + CDC_CFG_MGR.get_resource_collector_queue_length(), br_pool_, trans_ctx_mgr_, meta_manager_, store_service_, err_handler); INIT(tenant_mgr_, ObLogTenantMgr, enable_oracle_mode_match_case_sensitive, refresh_mode_); @@ -916,25 +919,26 @@ int ObLogInstance::init_components_(const uint64_t start_tstamp_ns) } } - INIT(trans_msg_sorter_, ObLogTransMsgSorter, enable_sort_by_seq_no, TCONF.msg_sorter_thread_num, TCONF.msg_sorter_task_count_upper_limit, - *trans_stat_mgr_, err_handler); + INIT(trans_msg_sorter_, ObLogTransMsgSorter, enable_sort_by_seq_no, TCONF.msg_sorter_thread_num, + CDC_CFG_MGR.get_msg_sorter_task_count_upper_limit(), *trans_stat_mgr_, err_handler); INIT(committer_, ObLogCommitter, start_seq, &br_queue_, resource_collector_, br_pool_, trans_ctx_mgr_, trans_stat_mgr_, err_handler); - INIT(storager_, ObLogStorager, TCONF.storager_thread_num, TCONF.storager_queue_length, *store_service_, *err_handler); + INIT(storager_, ObLogStorager, TCONF.storager_thread_num, CDC_CFG_MGR.get_storager_queue_length(), *store_service_, *err_handler); INIT(batch_buffer_, ObLogBatchBuffer, TCONF.batch_buf_size, TCONF.batch_buf_count, storager_); - INIT(reader_, ObLogReader, TCONF.reader_thread_num, TCONF.reader_queue_length, + INIT(reader_, ObLogReader, TCONF.reader_thread_num, CDC_CFG_MGR.get_reader_queue_length(), working_mode_, *store_service_, *err_handler); - INIT(formatter_, ObLogFormatter, TCONF.formatter_thread_num, DEFAULT_QUEUE_SIZE, working_mode_, + INIT(formatter_, ObLogFormatter, TCONF.formatter_thread_num, CDC_CFG_MGR.get_formatter_queue_length(), working_mode_, &obj2str_helper_, br_pool_, meta_manager_, schema_getter_, storager_, err_handler, skip_dirty_data, enable_hbase_mode, hbase_util_, skip_hbase_mode_put_column_count_not_consistency, enable_output_hidden_primary_key); - INIT(lob_data_merger_, ObCDCLobDataMerger, TCONF.lob_data_merger_thread_num, DEFAULT_QUEUE_SIZE, *err_handler); + INIT(lob_data_merger_, ObCDCLobDataMerger, TCONF.lob_data_merger_thread_num, + CDC_CFG_MGR.get_lob_data_merger_queue_length(), *err_handler); if (OB_SUCC(ret)) { if (OB_FAIL(lob_aux_meta_storager_.init(store_service_))) { @@ -946,15 +950,15 @@ int ObLogInstance::init_components_(const uint64_t start_tstamp_ns) INIT(ddl_processor_, ObLogDDLProcessor, schema_getter_, TCONF.skip_reversed_schema_verison); - INIT(sequencer_, ObLogSequencer, TCONF.sequencer_thread_num, TCONF.sequencer_queue_length, + INIT(sequencer_, ObLogSequencer, TCONF.sequencer_thread_num, CDC_CFG_MGR.get_sequencer_queue_length(), *trans_ctx_mgr_, *trans_stat_mgr_, *committer_, *trans_redo_dispatcher_, *trans_msg_sorter_, *err_handler); INIT(part_trans_parser_, ObLogPartTransParser, br_pool_, meta_manager_, cluster_info.cluster_id_); - INIT(dml_parser_, ObLogDmlParser, TCONF.dml_parser_thread_num, DEFAULT_QUEUE_SIZE, *formatter_, + INIT(dml_parser_, ObLogDmlParser, TCONF.dml_parser_thread_num, CDC_CFG_MGR.get_dml_parser_queue_length(), *formatter_, *err_handler, *part_trans_parser_); - INIT(ddl_parser_, ObLogDdlParser, TCONF.ddl_parser_thread_num, DEFAULT_QUEUE_SIZE, *err_handler, + INIT(ddl_parser_, ObLogDdlParser, TCONF.ddl_parser_thread_num, CDC_CFG_MGR.get_auto_queue_length(), *err_handler, *part_trans_parser_); INIT(sys_ls_handler_, ObLogSysLsTaskHandler, ddl_parser_, ddl_processor_, sequencer_, err_handler, @@ -981,6 +985,10 @@ int ObLogInstance::init_components_(const uint64_t start_tstamp_ns) } } + if (OB_SUCC(ret) && OB_FAIL(start_tenant_service_())) { + LOG_ERROR("start_tenant_service_ failed", KR(ret)); + } + LOG_INFO("init all components done", KR(ret), K(start_tstamp_ns), K_(sys_start_schema_version), K(max_cached_trans_ctx_count), K_(is_schema_split_mode), K_(enable_filter_sys_tenant)); @@ -1303,6 +1311,7 @@ void ObLogInstance::destroy_components_() if (is_online_refresh_mode(refresh_mode_)) { DESTROY(schema_getter_, ObLogSchemaGetter); } + DESTROY(meta_manager_, ObLogMetaManager); if (! is_online_sql_not_available()) { if (is_tenant_sync_mode()) { mysql_proxy_.destroy(); @@ -1315,7 +1324,6 @@ void ObLogInstance::destroy_components_() } } DESTROY(resource_collector_, ObLogResourceCollector); - DESTROY(meta_manager_, ObLogMetaManager); DESTROY(trans_ctx_mgr_, ObLogTransCtxMgr); DESTROY(trans_stat_mgr_, ObLogTransStatMgr); DESTROY(tenant_mgr_, ObLogTenantMgr); @@ -1323,7 +1331,8 @@ void ObLogInstance::destroy_components_() DESTROY(br_pool_, ObLogBRPool); DESTROY(storager_, ObLogStorager); DESTROY(reader_, ObLogReader); - DESTROY(store_service_, RocksDbStoreService); + // NOTICE: should not stop and destroy store_service in case progress core at rocksdb deconstruct + // DESTROY(store_service_, RocksDbStoreService); if (is_data_dict_refresh_mode(refresh_mode_)) { ObLogMetaDataService::get_instance().destroy(); } @@ -1436,8 +1445,6 @@ int ObLogInstance::launch() LOG_ERROR("start_threads_ fail", KR(ret)); } else if (OB_FAIL(timezone_info_getter_->start())) { LOG_ERROR("start_timezone_info_thread_ fail", KR(ret)); - } else if (OB_FAIL(start_tenant_service_())) { - LOG_ERROR("start_tenant_service_ failed", KR(ret)); } else { is_running_ = true; LOG_INFO("launch all components end success"); @@ -1512,6 +1519,7 @@ int ObLogInstance::get_tenant_ids(std::vector &tenant_ids) void ObLogInstance::mark_stop_flag(const char *stop_reason) { + stop_flag_ = true; if (inited_) { if (OB_ISNULL(stop_reason)) { stop_reason = "UNKNOWN"; @@ -1561,11 +1569,11 @@ int ObLogInstance::next_record(IBinlogRecord **record, IBinlogRecord *pop_record = NULL; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("instance has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("instance has not been initialized", KR(ret)); } else if (OB_ISNULL(record)) { - LOG_ERROR("invalid argument", K(record)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid argument", KR(ret), K(record)); } else if (OB_UNLIKELY(OB_SUCCESS != global_errno_)) { // In case of global error, the corresponding error code is returned, except for OB_TIMEOUT ret = (OB_TIMEOUT == global_errno_) ? OB_IN_STOP_STATE : global_errno_; @@ -1574,8 +1582,8 @@ int ObLogInstance::next_record(IBinlogRecord **record, LOG_ERROR("pop binlog record from br_queue fail", KR(ret)); } } else if (OB_ISNULL(pop_record)) { - LOG_ERROR("pop binlog record from br_queue fail", KR(ret), K(pop_record)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("pop binlog record from br_queue fail", KR(ret), K(pop_record)); } else { *record = pop_record; } @@ -1584,16 +1592,16 @@ int ObLogInstance::next_record(IBinlogRecord **record, ObLogBR *oblog_br = NULL; if (OB_ISNULL(record) || OB_ISNULL(*record)) { - LOG_ERROR("record is invalid", K(record)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("record is invalid", KR(ret), K(record)); } else if (OB_ISNULL(oblog_br = reinterpret_cast((*record)->getUserData()))) { - LOG_ERROR("get user data fail", "br", *record, K(oblog_br)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("get user data fail", KR(ret), "br", *record, K(oblog_br)); } else { int record_type = (*record)->recordType(); - int64_t timestamp_usec = (*record)->getTimestamp() * 1000000 + (*record)->getRecordUsec(); if (HEARTBEAT == record_type) { + int64_t timestamp_usec = (*record)->getTimestamp() * 1000000 + (*record)->getRecordUsec(); last_heartbeat_timestamp_micro_sec_ = std::max(timestamp_usec, last_heartbeat_timestamp_micro_sec_); } @@ -1628,7 +1636,7 @@ int ObLogInstance::next_record(IBinlogRecord **record, } if (OB_SUCC(ret)) { - if (! TCONF.enable_verify_mode) { + if (0 == TCONF.enable_verify_mode) { // do nothing } else { if (OB_FAIL(verify_dml_unique_id_(*record))) { @@ -1653,7 +1661,7 @@ int ObLogInstance::verify_ob_trace_id_(IBinlogRecord *br) } else if (OB_ISNULL(br)) { LOG_ERROR("invalid arguments", K(br)); ret = OB_INVALID_ARGUMENT; - } else if (! TCONF.need_verify_ob_trace_id) { + } else if (0 == TCONF.need_verify_ob_trace_id) { // do nothing } else { int record_type = br->recordType(); @@ -2136,6 +2144,11 @@ void ObLogInstance::timer_routine() // Periodic printing of statistical information if (REACH_TIME_INTERVAL(PRINT_INTERVAL)) { + _LOG_INFO("OBCDC RUNNING STATUS: [START_TS %s(%ld)][WORK_MODE: %s][META_REFRESH_MODE:%s][LOG_FETCH_MODE:%s]", + NTS_TO_STR(start_tstamp_ns_), start_tstamp_ns_, + print_working_mode(working_mode_), + print_refresh_mode(refresh_mode_), + print_fetching_mode(fetching_mode_)); print_tenant_memory_usage_(); if (is_online_refresh_mode(refresh_mode_)) { schema_getter_->print_stat_info(); @@ -2260,6 +2273,7 @@ void ObLogInstance::reload_config_() if (OB_FAIL(config.load_from_file(default_config_fpath))) { LOG_ERROR("load_from_file fail", KR(ret), K(default_config_fpath)); } else { + CDC_CFG_MGR.configure(config); const int64_t max_log_file_count = config.max_log_file_count; const bool enable_log_limit = (1 == config.enable_log_limit); LOG_INFO("reset log config", "log_level", config.log_level.str(), K(max_log_file_count)); @@ -2328,15 +2342,31 @@ void ObLogInstance::reload_config_() void ObLogInstance::print_tenant_memory_usage_() { + int ret = OB_SUCCESS; lib::ObMallocAllocator *mallocator = lib::ObMallocAllocator::get_instance(); if (OB_ISNULL(mallocator)) { LOG_ERROR_RET(OB_ERR_UNEXPECTED, "mallocator is NULL, can not print_tenant_memory_usage"); + } else if (OB_ISNULL(tenant_mgr_)) { + LOG_ERROR_RET(OB_ERR_UNEXPECTED, "tenant_mgr is NULL, can not print_tenant_memory_usage for each tenant"); } else { - mallocator->print_tenant_memory_usage(OB_SYS_TENANT_ID); - mallocator->print_tenant_ctx_memory_usage(OB_SYS_TENANT_ID); + std::vector tenant_ids; + if (enable_filter_sys_tenant_) { + //.print sys tenant memory usage here + mallocator->print_tenant_memory_usage(OB_SYS_TENANT_ID); + mallocator->print_tenant_ctx_memory_usage(OB_SYS_TENANT_ID); + } mallocator->print_tenant_memory_usage(OB_SERVER_TENANT_ID); mallocator->print_tenant_ctx_memory_usage(OB_SERVER_TENANT_ID); + + if (OB_FAIL(tenant_mgr_->get_all_tenant_ids(tenant_ids))) { + LOG_ERROR("get_all_tenant_ids failed", KR(ret)); + } else { + for (auto tenant_id: tenant_ids) { + mallocator->print_tenant_memory_usage(tenant_id); + mallocator->print_tenant_ctx_memory_usage(tenant_id); + } + } } } @@ -2362,19 +2392,21 @@ void ObLogInstance::global_flow_control_() K(formatter_), K(sys_ls_handler_), K(resource_collector_)); } else { int64_t part_trans_task_active_count_upper_bound = - TCONF.part_trans_task_active_count_upper_bound; + CDC_CFG_MGR.get_part_trans_task_active_count_upper_bound(); int64_t part_trans_task_reusable_count_upper_bound = - TCONF.part_trans_task_reusable_count_upper_bound; + CDC_CFG_MGR.get_part_trans_task_reusable_count_upper_bound(); int64_t ready_to_seq_task_upper_bound = - TCONF.ready_to_seq_task_upper_bound; + CDC_CFG_MGR.get_ready_to_seq_task_upper_bound(); int64_t storager_task_count_upper_bound = - TCONF.storager_task_count_upper_bound; + CDC_CFG_MGR.get_storager_task_count_upper_bound(); int64_t storager_mem_percentage = - TCONF.storager_mem_percentage; + CDC_CFG_MGR.get_storager_mem_percentage(); double system_memory_avail_percentage_lower_bound = static_cast(TCONF.system_memory_avail_percentage_lower_bound) / 100; - int64_t memory_limit = TCONF.memory_limit.get(); - int64_t redo_mem_limit = TCONF.redo_dispatcher_memory_limit.get(); + double memory_usage_warn_percent = TCONF.memory_usage_warn_threshold / 100.0; + int64_t memory_limit = CDC_CFG_MGR.get_memory_limit(); + int64_t memory_warn_usage = memory_limit * memory_usage_warn_percent; + int64_t redo_mem_limit = CDC_CFG_MGR.get_redo_dispatcher_memory_limit(); int64_t redo_mem_usage = trans_redo_dispatcher_->get_dispatched_memory_size(); int64_t total_part_trans_task_count = trans_task_pool_.get_total_count(); @@ -2382,6 +2414,7 @@ void ObLogInstance::global_flow_control_() int64_t active_log_entry_task_count = log_entry_task_pool_->get_alloc_count(); int64_t reusable_part_trans_task_count = 0; int64_t ready_to_seq_task_count = 0; + int64_t seq_queue_trans_count = 0; int64_t fetcher_part_trans_task_count = fetcher_->get_part_trans_task_count(); int64_t dml_parser_part_trans_task_count = 0; @@ -2401,28 +2434,53 @@ void ObLogInstance::global_flow_control_() int64_t system_memory_avail_lower_bound = static_cast(static_cast(system_memory_limit) * system_memory_avail_percentage_lower_bound); bool need_slow_down_fetcher = false; + const bool need_pause_dispatch = need_pause_redo_dispatch(); + const bool touch_memory_warn_limit = (memory_hold > memory_warn_usage); + const bool is_storage_work_mode = is_storage_working_mode(working_mode_); + const char *reason = ""; - if (OB_FAIL(get_task_count_(ready_to_seq_task_count, reusable_part_trans_task_count))) { - LOG_ERROR("get_task_count fail", KR(ret), K(ready_to_seq_task_count), + if (OB_FAIL(get_task_count_(ready_to_seq_task_count, seq_queue_trans_count, reusable_part_trans_task_count))) { + LOG_ERROR("get_task_count fail", KR(ret), K(ready_to_seq_task_count), K(seq_queue_trans_count), K(reusable_part_trans_task_count)); } else if (OB_FAIL(dml_parser_->get_log_entry_task_count(dml_parser_part_trans_task_count))) { LOG_ERROR("DML parser get_log_entry_task_count fail", KR(ret), K(dml_parser_part_trans_task_count)); } else { + const bool is_seq_queue_not_empty = (seq_queue_trans_count > 0); int64_t storager_task_count = 0; int64_t storager_log_count = 0; storager_->get_task_count(storager_task_count, storager_log_count); // Use the following policy for global traffic control: - // need_slow_down = (active partitioned transaction tasks exceed the upper limit || libobcdc takes up more memory than the upper limit || system free memory is less than a certain percentage) - // && (reusable transaction tasks exceeds limit || Parser and Sequencer module cache tasks exceeds limit) + // need_slow_down = + // (1) (active partitioned transaction tasks exceed the upper limit || liboblog takes up more memory than the upper limit || system free memory is less than a certain percentage) + // && (reusable transaction tasks exceeds limit || Parser and Sequencer module cache tasks exceeds limit || if in storage working mode) + // OR + // (2) storager task overload with certain threshold + // OR + // (3) memory is limited and exist trans sequenced but not output + // OR + // (4) memory_limit touch warn threshold and need_pause_dispatch bool condition1 = (active_part_trans_task_count >= part_trans_task_active_count_upper_bound) - || (memory_hold >= memory_limit) + || touch_memory_warn_limit || (system_memory_avail < system_memory_avail_lower_bound); bool condition2 = (reusable_part_trans_task_count >= part_trans_task_reusable_count_upper_bound) || (ready_to_seq_task_count > ready_to_seq_task_upper_bound); bool condition3 = (storager_task_count > storager_task_count_upper_bound) && (memory_hold >= storager_mem_percentage * memory_limit); - need_slow_down_fetcher = (condition1 && condition2) || (condition3); + need_slow_down_fetcher = (condition1 && (condition2 || need_pause_dispatch || is_seq_queue_not_empty)) || condition3; + if (need_slow_down_fetcher) { + if (condition2) { + reason = "MEMORY_LIMIT_AND_REUSABLE_PART_TOO_MUCH"; + } else if (need_pause_dispatch) { + reason = "MEMORY_LIMIT_AND_DISPATCH_PAUSED"; + } else if (is_seq_queue_not_empty) { + reason = "MEMORY_LIMIT_AND_EXIST_TRANS_TO_OUTPUT"; + } else if (condition3) { + reason = "STORAGER_TASK_OVER_THRESHOLD"; + } else { + reason = "MEMORY_LIMIT"; + } + } // Get the number of active distributed transactions after sequencing, including sequenced, formatted, and committed int64_t seq_trans_count = @@ -2440,13 +2498,13 @@ void ObLogInstance::global_flow_control_() "PAUSED=%d MEM=%s/%s " "AVAIL_MEM=%s/%s " "READY_TO_SEQ=%ld/%ld " - "PART_TRANS(TOTAL=%ld,ACTIVE=%ld/%ld,REUSABLE=%ld/%ld) " + "PART_TRANS(TOTAL=%ld, ACTIVE=%ld/%ld, REUSABLE=%ld/%ld) " "LOG_TASK(ACTIVE=%ld) " "STORE(%ld/%ld) " "[FETCHER=%ld DML_PARSER=%ld " "COMMITER=%ld USER_QUEUE=%ld OUT=%ld RC=%ld] " - "DIST_TRANS(SEQ=%ld,COMMITTED=%ld) " - "REDO_DISPATCH=%s/%s", + "DIST_TRANS(SEQ_QUEUE=%ld, SEQ=%ld, COMMITTED=%ld) " + "NEED_PAUSE_DISPATCH=%d REASON=%s", need_slow_down_fetcher, current_fetcher_is_paused, SIZE_TO_STR(memory_hold), SIZE_TO_STR(memory_limit), SIZE_TO_STR(system_memory_avail), SIZE_TO_STR(system_memory_avail_lower_bound), @@ -2460,8 +2518,8 @@ void ObLogInstance::global_flow_control_() committer_ddl_part_trans_task_count + committer_dml_part_trans_task_count, br_queue_part_trans_task_count, out_part_trans_task_count, resource_collector_part_trans_task_count, - seq_trans_count, committed_trans_count, - SIZE_TO_STR(redo_mem_usage), SIZE_TO_STR(redo_mem_limit)); + seq_queue_trans_count, seq_trans_count, committed_trans_count, + need_pause_dispatch, reason); } } @@ -2521,7 +2579,7 @@ void ObLogInstance::dump_pending_trans_info_() int64_t total_size = pos; char *ptr = buffer; - while (OB_SUCC(ret) && total_size > 0) { + while (OB_SUCC(ret) && total_size > 0 && ! stop_flag_) { ssize_t nwrite = 0; nwrite = write(fd, ptr, static_cast(total_size)); @@ -2600,26 +2658,29 @@ void ObLogInstance::clean_log_() } } -int64_t ObLogInstance::get_memory_hold_() +int64_t ObLogInstance::get_memory_hold_() const { return lib::get_memory_used(); } -int64_t ObLogInstance::get_memory_avail_() +int64_t ObLogInstance::get_memory_avail_() const { return lib::get_memory_avail(); } -int64_t ObLogInstance::get_memory_limit_() +int64_t ObLogInstance::get_memory_limit_() const { return lib::get_memory_limit(); } -int ObLogInstance::get_task_count_(int64_t &ready_to_seq_task_count, +int ObLogInstance::get_task_count_( + int64_t &ready_to_seq_task_count, + int64_t &seq_trans_count, int64_t &part_trans_task_resuable_count) { int ret = OB_SUCCESS; ready_to_seq_task_count = 0; + seq_trans_count = 0; part_trans_task_resuable_count = 0; if (OB_ISNULL(fetcher_) || OB_ISNULL(dml_parser_) || OB_ISNULL(formatter_) @@ -2655,6 +2716,7 @@ int ObLogInstance::get_task_count_(int64_t &ready_to_seq_task_count, // Count the number of partitioned tasks to be ordered ready_to_seq_task_count = dml_parser_log_count + formatter_log_count + storager_log_count; + seq_trans_count = seq_stat_info.sequenced_trans_count_; } // II. Get the number of reusable tasks for each module @@ -2696,9 +2758,10 @@ int ObLogInstance::get_task_count_(int64_t &ready_to_seq_task_count, _LOG_INFO("[TASK_COUNT_STAT] [FETCHER] [PART_TRANS_TASK=%ld]", fetcher_part_trans_task_count); _LOG_INFO("[TASK_COUNT_STAT] [SYS_LS_HANDLE] [PART_TRANS_TASK=%ld]", sys_ls_handle_part_trans_task_count); _LOG_INFO("[TASK_COUNT_STAT] [STORAGER] [LOG_TASK=%ld/%ld]", storager_task_count, storager_log_count); - _LOG_INFO("[TASK_COUNT_STAT] [SEQUENCER] [PART_TRANS_TASK(QUEUE=%ld TOTAL=[%ld][DDL=%ld DML=%ld HB=%ld])] [SEQ_TRANS=%ld]", - seq_stat_info.queue_part_trans_task_count_, seq_stat_info.total_part_trans_task_count_, seq_stat_info.ddl_part_trans_task_count_, - seq_stat_info.dml_part_trans_task_count_, seq_stat_info.hb_part_trans_task_count_, seq_stat_info.sequenced_trans_count_); + _LOG_INFO("[TASK_COUNT_STAT] [SEQUENCER] [PART_TRANS_TASK(QUEUE=%ld TOTAL=[%ld][DDL=%ld DML=%ld HB=%ld])] [TRANS(READY=%ld SEQ=%ld)]", + seq_stat_info.queue_part_trans_task_count_, seq_stat_info.total_part_trans_task_count_, + seq_stat_info.ddl_part_trans_task_count_, seq_stat_info.dml_part_trans_task_count_, seq_stat_info.hb_part_trans_task_count_, + seq_stat_info.ready_trans_count_, seq_stat_info.sequenced_trans_count_); _LOG_INFO("[TASK_COUNT_STAT] [READER] [ROW_TASK=%ld]", reader_task_count); _LOG_INFO("[TASK_COUNT_STAT] [DML_PARSER] [LOG_TASK=%ld]", dml_parser_log_count); _LOG_INFO("[TASK_COUNT_STAT] [FORMATTER] [BR=%ld LOG_TASK=%ld LOB_STMT=%ld]", formatter_br_count, formatter_log_count, stmt_in_lob_merger_count); @@ -3047,6 +3110,81 @@ int ObLogInstance::get_tenant_compat_mode(const uint64_t tenant_id, return ret; } +// pause disaptch if: +// 0. user force pause. +// 1. user queue backlog. +// 2. resource collector backlog. +bool ObLogInstance::need_pause_redo_dispatch() const +{ + bool current_need_pause = true; + static bool last_need_paused = false; + if (inited_) { + double memory_usage_warn_percent = TCONF.memory_usage_warn_threshold / 100.0; + int64_t memory_limit = CDC_CFG_MGR.get_memory_limit(); + int64_t memory_warn_usage = memory_limit * memory_usage_warn_percent; + int64_t memory_hold = get_memory_hold_(); + int64_t redo_dispatch_exceed_ratio = CDC_CFG_MGR.get_redo_dispatched_memory_limit_exceed_ratio(); + const int64_t redo_memory_limit = CDC_CFG_MGR.get_redo_dispatcher_memory_limit(); + const int64_t rc_br_thread_count = TCONF.resource_collector_thread_num_for_br; + const int64_t rc_thread_queue_len = CDC_CFG_MGR.get_resource_collector_queue_length(); + int64_t resource_collector_part_trans_task_count = 0; + int64_t resource_collector_br_count = 0; + resource_collector_->get_task_count(resource_collector_part_trans_task_count, resource_collector_br_count); + const int64_t user_queue_br_count = br_queue_.get_dml_br_count() + br_queue_.get_ddl_br_count(); + const bool force_pause_dispatch = (0 != TCONF.pause_dispatch_redo); + const int64_t pause_dispatch_threshold = TCONF.pause_redo_dispatch_task_count_threshold; + const bool touch_memory_warn_limit = (memory_hold > memory_warn_usage); + const bool touch_memory_limit = (memory_hold > memory_limit); + double pause_dispatch_percent = pause_dispatch_threshold / 100.0; + if (touch_memory_limit) { + pause_dispatch_percent = 0; + // pause redo dispatch + } else if (touch_memory_warn_limit) { + pause_dispatch_percent = pause_dispatch_percent * 0.1; + // if already touch memory_warn limit, increase probability of redo dispatch flow control + } + int64_t dispatched_redo_memory = 0; + if (OB_NOT_NULL(trans_redo_dispatcher_)) { + dispatched_redo_memory = trans_redo_dispatcher_->get_dispatched_memory_size(); + } + const bool is_redo_dispatch_over_exceed = (dispatched_redo_memory >= redo_dispatch_exceed_ratio * redo_memory_limit); + const char *reason = ""; + if (force_pause_dispatch) { + current_need_pause = true; + reason = "USER_FORCE_PAUSE"; + } else if (resource_collector_br_count > (rc_br_thread_count * rc_thread_queue_len * pause_dispatch_percent)) { + current_need_pause = (is_redo_dispatch_over_exceed || touch_memory_warn_limit); + reason = "SLOW_RESOURCE_RECYCLING"; + } else if (user_queue_br_count > (CDC_CFG_MGR.get_br_queue_length() * pause_dispatch_percent)) { + current_need_pause = (is_redo_dispatch_over_exceed || touch_memory_warn_limit); + reason = "SLOW_CONSUMPTION_DOWNSTREAM"; + } else { + current_need_pause = false; + } + bool is_state_change = (last_need_paused != current_need_pause); + bool need_print_state = (is_state_change || current_need_pause) && REACH_TIME_INTERVAL(PRINT_GLOBAL_FLOW_CONTROL_INTERVAL); + if (need_print_state) { + _LOG_INFO("[NEED_PAUSE_REDO_DISPATCH=%d]" + "[REASON:%s]" + "[REDO_DISPATCH:%s/%s]" + "[THRESHOLD:%.2f]" + "[QUEUE_DML_BR:%ld]" + "[RESOURCE_COLLECTOR:%ld]" + "[STATE_CHANGED:%d]", + current_need_pause, + reason, + SIZE_TO_STR(dispatched_redo_memory), SIZE_TO_STR(redo_memory_limit), + pause_dispatch_percent, + user_queue_br_count, + resource_collector_br_count, + is_state_change); + } + if (is_state_change) { + last_need_paused = current_need_pause; + } + } + return current_need_pause; +} } } diff --git a/src/logservice/libobcdc/src/ob_log_instance.h b/src/logservice/libobcdc/src/ob_log_instance.h index 978137c0ac..fa240c1fe6 100644 --- a/src/logservice/libobcdc/src/ob_log_instance.h +++ b/src/logservice/libobcdc/src/ob_log_instance.h @@ -155,7 +155,7 @@ public: int32_t get_log_level() const; const char *get_log_file() const; void set_disable_redirect_log(const bool flag) { disable_redirect_log_ = flag; } - bool is_running() { return is_running_; } + OB_INLINE bool is_running() const { return is_running_; } static void print_version(); int set_assign_log_dir(const char *log_dir, const int64_t log_dir_len); @@ -183,6 +183,8 @@ public: int64_t &start_schema_version); // 3. set start global trans version int set_start_global_trans_version(const int64_t start_global_trans_version); + // check need pause when consume or resource collector backlog + bool need_pause_redo_dispatch() const; // online sql not available only when using data_dict and fetch_log directly from backup. OB_INLINE bool is_online_sql_not_available() const @@ -229,13 +231,15 @@ private: // 3. init sql_proxy int init_sql_provider_(); // Get the total amount of memory occupied by libobcdc - int64_t get_memory_hold_(); + int64_t get_memory_hold_() const; // Get system free memory - int64_t get_memory_avail_(); + int64_t get_memory_avail_() const; // Get system memory limit - int64_t get_memory_limit_(); + int64_t get_memory_limit_() const; // Get the number of tasks to be processed - int get_task_count_(int64_t &ready_to_seq_task_count, + int get_task_count_( + int64_t &ready_to_seq_task_count, + int64_t &seq_trans_count, int64_t &part_trans_task_resuable_count); // next record diff --git a/src/logservice/libobcdc/src/ob_log_ls_fetch_stream.cpp b/src/logservice/libobcdc/src/ob_log_ls_fetch_stream.cpp index 75c0a49966..0ad342df82 100644 --- a/src/logservice/libobcdc/src/ob_log_ls_fetch_stream.cpp +++ b/src/logservice/libobcdc/src/ob_log_ls_fetch_stream.cpp @@ -163,7 +163,9 @@ int FetchStream::prepare_to_fetch_logs( // For the fetch log stream task, it should be immediately assigned to a worker thread for processing if (OB_FAIL(stream_worker_->dispatch_stream_task(*this, "DispatchServer"))) { - LOG_ERROR("dispatch stream task fail", KR(ret)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch stream task fail", KR(ret)); + } } else { // Note: You cannot continue to manipulate this data structure afterwards ! } @@ -235,7 +237,9 @@ void FetchStream::process_timer_task() LOG_ERROR("invalid stream worker", K(stream_worker_)); ret = OB_INVALID_ERROR; } else if (OB_FAIL(stream_worker_->dispatch_stream_task(*this, "TimerWakeUp"))) { - LOG_ERROR("dispatch stream task fail", KR(ret), K(this)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("dispatch stream task fail", KR(ret), K(this)); + } } else { ATOMIC_STORE(&end_time, get_timestamp()); max_dispatch_time = std::max(max_dispatch_time, ATOMIC_LOAD(&end_time) - start_time); @@ -423,9 +427,11 @@ int FetchStream::dispatch_fetch_task_(LSFetchCtx &task, ls_fetch_ctx_->set_not_in_fetching_log(); if (OB_FAIL(stream_worker_->dispatch_fetch_task(task, dispatch_reason_str))) { - // Assignment of fetch log tasks - LOG_ERROR("dispatch fetch task fail", KR(ret), K(task), - "dispatch_reason", dispatch_reason_str); + if (OB_IN_STOP_STATE != ret) { + // Assignment of fetch log tasks + LOG_ERROR("dispatch fetch task fail", KR(ret), K(task), + "dispatch_reason", dispatch_reason_str); + } } else { // You cannot continue with the task afterwards } @@ -2075,7 +2081,9 @@ int FetchStream::update_fetch_task_state_(KickOutInfo &kick_out_info, if (OB_SUCCESS == ret && task->need_update_svr_list()) { bool need_print_info = (TCONF.print_ls_server_list_update_info != 0); if (OB_FAIL(task->update_svr_list(need_print_info))) { - LOG_ERROR("update svr list fail", KR(ret), KPC(task)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("update svr list fail", KR(ret), KPC(task)); + } } } diff --git a/src/logservice/libobcdc/src/ob_log_ls_fetch_stream.h b/src/logservice/libobcdc/src/ob_log_ls_fetch_stream.h index cd5181c3a6..5be68fbf88 100644 --- a/src/logservice/libobcdc/src/ob_log_ls_fetch_stream.h +++ b/src/logservice/libobcdc/src/ob_log_ls_fetch_stream.h @@ -123,6 +123,9 @@ public: int64_t get_fetch_task_count() const { return 1; } + // is rpc response ready + bool is_rpc_ready() const { return fetch_log_arpc_.is_rpc_ready(); } + public: static void configure(const ObLogConfig & config); diff --git a/src/logservice/libobcdc/src/ob_log_meta_data_fetcher.cpp b/src/logservice/libobcdc/src/ob_log_meta_data_fetcher.cpp index 2e1dc521c6..024c1ab54a 100644 --- a/src/logservice/libobcdc/src/ob_log_meta_data_fetcher.cpp +++ b/src/logservice/libobcdc/src/ob_log_meta_data_fetcher.cpp @@ -85,7 +85,6 @@ int ObLogMetaDataFetcher::init( } else if (OB_FAIL(trans_task_pool_.init( &trans_task_pool_alloc_, PART_TRANS_TASK_PREALLOC_COUNT, - cfg.part_trans_task_page_size, true/*allow_dynamic_alloc*/, PART_TRANS_TASK_PREALLOC_PAGE_COUNT))) { LOG_ERROR("trans_task_pool_ init failed", KR(ret)); diff --git a/src/logservice/libobcdc/src/ob_log_mysql_connector.h b/src/logservice/libobcdc/src/ob_log_mysql_connector.h index 1b03a7d888..560f83a89f 100644 --- a/src/logservice/libobcdc/src/ob_log_mysql_connector.h +++ b/src/logservice/libobcdc/src/ob_log_mysql_connector.h @@ -114,7 +114,6 @@ struct MySQLConnConfig TO_STRING_KV(K_(svr), K_(mysql_user), - K_(mysql_password), K_(mysql_db), K_(mysql_connect_timeout_sec), K_(mysql_query_timeout_sec)); diff --git a/src/logservice/libobcdc/src/ob_log_mysql_proxy.cpp b/src/logservice/libobcdc/src/ob_log_mysql_proxy.cpp index 87e56b8a1d..0c2e887b12 100644 --- a/src/logservice/libobcdc/src/ob_log_mysql_proxy.cpp +++ b/src/logservice/libobcdc/src/ob_log_mysql_proxy.cpp @@ -145,7 +145,9 @@ void ObLogMysqlProxy::destroy() inited_ = false; connection_pool_.stop(); - TG_DESTROY(tg_id_); + // NOTICE: should not stop and wait timer task cause timer task will cancel in connection_pool_ + // deconstruct, stop and wait will invoke by cancel in timer task + // TG_DESTROY(tg_id_); cluster_user_[0] = '\0'; cluster_password_[0] = '\0'; diff --git a/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp b/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp index 7707b0d9a4..7707c89193 100644 --- a/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp +++ b/src/logservice/libobcdc/src/ob_log_part_trans_task.cpp @@ -2054,7 +2054,7 @@ int ObLogEntryTask::link_row_list(int64_t &row_ref_cnt) if (OB_SUCC(ret)) { // Note: First set ref count before set formatted status, to avoid Sortter has get Dml Stmt set_row_ref_cnt(redo_node_->get_valid_row_num()); - row_ref_cnt = row_ref_cnt_; + row_ref_cnt = get_row_ref_cnt(); if (OB_FAIL(set_redo_log_formatted())) { LOG_ERROR("set_redo_log_formatted fail", KR(ret)); diff --git a/src/logservice/libobcdc/src/ob_log_reader.cpp b/src/logservice/libobcdc/src/ob_log_reader.cpp index 2814e5b75d..a30e9eecf0 100644 --- a/src/logservice/libobcdc/src/ob_log_reader.cpp +++ b/src/logservice/libobcdc/src/ob_log_reader.cpp @@ -57,13 +57,13 @@ int ObLogReader::init(const int64_t thread_num, int ret = OB_SUCCESS; if (OB_UNLIKELY(inited_)) { - LOG_ERROR("ObLogReader has been initialized"); ret = OB_INIT_TWICE; + LOG_ERROR("ObLogReader has been initialized", KR(ret)); } else if (OB_UNLIKELY(thread_num <= 0) || OB_UNLIKELY(queue_size <= 0) || OB_UNLIKELY(! is_working_mode_valid(working_mode))) { - LOG_ERROR("invalid arguments", K(thread_num), K(queue_size), K(working_mode)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid arguments", KR(ret), K(thread_num), K(queue_size), K(working_mode)); } else if (OB_FAIL(ReaderThread::init(thread_num, queue_size))) { LOG_ERROR("init ReaderThread queue thread fail", K(ret), K(thread_num), K(queue_size)); } else { @@ -107,8 +107,8 @@ int ObLogReader::start() int ret = OB_SUCCESS; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogReader has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogReader has not been initialized", KR(ret)); } else if (OB_FAIL(ReaderThread::start())) { LOG_ERROR("start ReaderThread thread fail", K(ret), "thread_num", get_thread_num()); } else { diff --git a/src/logservice/libobcdc/src/ob_log_resource_collector.cpp b/src/logservice/libobcdc/src/ob_log_resource_collector.cpp index 2dfbdf4101..0df9619a53 100644 --- a/src/logservice/libobcdc/src/ob_log_resource_collector.cpp +++ b/src/logservice/libobcdc/src/ob_log_resource_collector.cpp @@ -392,6 +392,8 @@ int ObLogResourceCollector::revert_participants_(const int64_t thread_index, int ObLogResourceCollector::push_task_into_queue_(ObLogResourceRecycleTask &task) { int ret = OB_SUCCESS; + const static int64_t PUSH_TASK_TIMEOUT_WAIT_TIME = 1 * _MSEC_; + const static int64_t PUSH_TASK_TIMEOUT_PRINT_INTERVAL = 10 * _SEC_; static uint64_t part_trans_task_push_seq = 0; static uint64_t br_push_seq = 0; uint64_t hash_value = 0; @@ -419,11 +421,15 @@ int ObLogResourceCollector::push_task_into_queue_(ObLogResourceRecycleTask &task while (OB_SUCC(ret) && ! RCThread::is_stoped()) { ret = RCThread::push(&task, hash_value, DATA_OP_TIMEOUT); - if (OB_TIMEOUT != ret) { - break; - } else { - // When timeout, need to retry + // retry if OB_TIMEOUT and break for other ret code + if (OB_UNLIKELY(OB_TIMEOUT == ret)) { + if (TC_REACH_TIME_INTERVAL(PUSH_TASK_TIMEOUT_PRINT_INTERVAL)) { + LOG_INFO("push task into RC Thread timeout, retrying", KR(ret)); + } + usleep(PUSH_TASK_TIMEOUT_WAIT_TIME); ret = OB_SUCCESS; + } else { + break; } } // Note: After a task is pushed to the queue, it may be recycled quickly and the task cannot be accessed later diff --git a/src/logservice/libobcdc/src/ob_log_resource_recycle_task.h b/src/logservice/libobcdc/src/ob_log_resource_recycle_task.h index e1f776f413..f1c0fdf354 100644 --- a/src/logservice/libobcdc/src/ob_log_resource_recycle_task.h +++ b/src/logservice/libobcdc/src/ob_log_resource_recycle_task.h @@ -29,11 +29,11 @@ public: BINLOG_RECORD_TASK = 2, LOB_DATA_CLEAN_TASK = 3, }; - bool is_unknown_task() const { return UNKNOWN_TASK == task_type_; } - bool is_part_trans_task() const { return PART_TRANS_TASK == task_type_; } - bool is_binlog_record_task() const { return BINLOG_RECORD_TASK == task_type_; } - bool is_lob_data_clean_task() const { return LOB_DATA_CLEAN_TASK == task_type_; } - TaskType get_task_type() const { return task_type_; } + OB_INLINE bool is_unknown_task() const { return UNKNOWN_TASK == task_type_; } + OB_INLINE bool is_part_trans_task() const { return PART_TRANS_TASK == task_type_; } + OB_INLINE bool is_binlog_record_task() const { return BINLOG_RECORD_TASK == task_type_; } + OB_INLINE bool is_lob_data_clean_task() const { return LOB_DATA_CLEAN_TASK == task_type_; } + OB_INLINE TaskType get_task_type() const { return task_type_; } static const char *print_task_type(TaskType task) { diff --git a/src/logservice/libobcdc/src/ob_log_rocksdb_store_service.cpp b/src/logservice/libobcdc/src/ob_log_rocksdb_store_service.cpp index 9d7ac905b1..c04f5ade7a 100644 --- a/src/logservice/libobcdc/src/ob_log_rocksdb_store_service.cpp +++ b/src/logservice/libobcdc/src/ob_log_rocksdb_store_service.cpp @@ -90,13 +90,13 @@ int RocksDbStoreService::close() if (NULL != m_db_) { LOG_INFO("closing rocksdb ..."); mark_stop_flag(); - usleep(100 * _MSEC_); + usleep(5 * _SEC_); rocksdb::Status status = m_db_->Close(); if (! status.ok()) { - _LOG_ERROR("rocksdb close failed, error %s", status.ToString().c_str()); ret = OB_ERR_UNEXPECTED; + _LOG_ERROR("rocksdb close failed, error %s", status.ToString().c_str()); } else { LOG_INFO("rocksdb close succ"); } @@ -369,6 +369,10 @@ int RocksDbStoreService::create_column_family(const std::string& column_family_n cf_options.max_write_buffer_number = 9; // Column Family's default memtable size is 64M, when the maximum limit is exceeded, memtable -> immutable memtable, increase write_buffer_size, can reduce write amplification cf_options.write_buffer_size = rocksdb_write_buffer_size << 20; + // config rocksdb compression + // supported compress algorithms will print in LOG file + // cf_options.compression = rocksdb::CompressionType::kLZ4Compression; + // cf_options.bottommost_compression = rocksdb::CompressionType::kZSTD; if (is_stopped()) { ret = OB_IN_STOP_STATE; diff --git a/src/logservice/libobcdc/src/ob_log_schema_getter.cpp b/src/logservice/libobcdc/src/ob_log_schema_getter.cpp index 607da637e3..1ce10b29dc 100644 --- a/src/logservice/libobcdc/src/ob_log_schema_getter.cpp +++ b/src/logservice/libobcdc/src/ob_log_schema_getter.cpp @@ -485,7 +485,11 @@ int ObLogSchemaGetter::init(common::ObMySQLProxy &mysql_proxy, void ObLogSchemaGetter::destroy() { + LOG_INFO("ObCDCSchemaGetter destroy begin"); inited_ = false; + schema_service_.stop(); + schema_service_.wait(); + LOG_INFO("ObCDCSchemaGetter destroy succ"); } int ObLogSchemaGetter::get_lazy_schema_guard(const uint64_t tenant_id, diff --git a/src/logservice/libobcdc/src/ob_log_sequencer1.cpp b/src/logservice/libobcdc/src/ob_log_sequencer1.cpp index 1527cb0e07..d688169796 100644 --- a/src/logservice/libobcdc/src/ob_log_sequencer1.cpp +++ b/src/logservice/libobcdc/src/ob_log_sequencer1.cpp @@ -73,8 +73,12 @@ ObLogSequencer::ObLogSequencer() last_global_checkpoint_(OB_INVALID_TIMESTAMP), global_seq_(0), br_committer_queue_seq_(0), - trans_queue_(), + ready_trans_queue_(), trans_queue_lock_(), + seq_trans_queue_(), + checkpoint_cond_(), + ready_queue_cond_(), + seq_queue_cond_(), total_part_trans_task_count_(0), ddl_part_trans_task_count_(0), dml_part_trans_task_count_(0), @@ -108,6 +112,7 @@ int ObLogSequencer::init( IObLogErrHandler &err_handler) { int ret = OB_SUCCESS; + static const int64_t seq_thread_num = 2; if (OB_UNLIKELY(inited_)) { LOG_ERROR("ObLogSequencer has been initialized"); @@ -118,6 +123,10 @@ int ObLogSequencer::init( ret = OB_INVALID_ARGUMENT; } else if (OB_FAIL(SequencerThread::init(thread_num, queue_size))) { LOG_ERROR("init sequencer queue thread fail", KR(ret), K(thread_num), K(queue_size)); + } else if (OB_FAIL(lib::ThreadPool::set_thread_count(seq_thread_num))) { + LOG_ERROR("set sequence thread num failed", KR(ret), K(seq_thread_num)); + } else if (OB_FAIL(seq_trans_queue_.init(queue_size))) { + LOG_ERROR("init sequenced_trans_queue failed", KR(ret), K(queue_size)); } else if (OB_FAIL(schema_inc_replay_.init(false/*is_start_progress*/))) { LOG_ERROR("schema_inc_replay_ init failed", KR(ret)); } else { @@ -193,7 +202,7 @@ int ObLogSequencer::start() void ObLogSequencer::stop() { if (inited_) { - lib::ThreadPool::stop(); + mark_stop_flag(); SequencerThread::stop(); LOG_INFO("stop threads succ", "thread_num", get_thread_num()); } @@ -243,82 +252,153 @@ void ObLogSequencer::get_task_count(SeqStatInfo &stat_info) stat_info.dml_part_trans_task_count_ = ATOMIC_LOAD(&dml_part_trans_task_count_); stat_info.hb_part_trans_task_count_ = ATOMIC_LOAD(&hb_part_trans_task_count_); stat_info.queue_part_trans_task_count_ = ATOMIC_LOAD(&queue_part_trans_task_count_); - stat_info.sequenced_trans_count_ = trans_queue_.size(); + stat_info.ready_trans_count_ = ready_trans_queue_.size(); + stat_info.sequenced_trans_count_ = seq_trans_queue_.get_curr_total(); } // A thread is responsible for continually rotating the sequence of transactions that need sequence void ObLogSequencer::run1() { - ObLogTraceIdGuard trace_guard; - const int64_t SLEEP_US = 1000; - lib::set_thread_name("ObLogSequencerTrans"); int ret = OB_SUCCESS; - bool enable_monitor = false; - ObLogTimeMonitor monitor("Sequencer-deal-trans", enable_monitor); + const int64_t thread_idx = lib::ThreadPool::get_thread_idx(); + const int64_t thread_count = lib::ThreadPool::get_thread_count(); - while (OB_SUCC(ret) && ! lib::ThreadPool::has_set_stop()) { - // Global checkpoint not updated or initial value, do nothing - if (ATOMIC_LOAD(&global_checkpoint_) == ATOMIC_LOAD(&last_global_checkpoint_)) { - ob_usleep(SLEEP_US); - } else { - ObByteLockGuard guard(trans_queue_lock_); - - while (OB_SUCC(ret) && ! trans_queue_.empty() && ! lib::ThreadPool::has_set_stop()) { - ObLogTraceIdGuard trace_guard; - TrxSortElem top_trx_sort_elem = trans_queue_.top(); - const int64_t trans_commit_version = top_trx_sort_elem.get_trans_commit_version(); - monitor.mark_and_get_cost("begin", true); - - if (trans_commit_version <= ATOMIC_LOAD(&global_checkpoint_)) { - if (OB_FAIL(handle_to_be_sequenced_trans_(top_trx_sort_elem, lib::ThreadPool::has_set_stop()))) { - if (OB_IN_STOP_STATE != ret) { - LOG_ERROR("handle_to_be_sequenced_trans_ fail", KR(ret), K(top_trx_sort_elem)); - } - } else { - monitor.mark_and_get_cost("end", true); - trans_queue_.pop(); - } - } else { - break; - } - } // empty + if (thread_count > 2) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("only expected two thread to handle ready trans", + KR(ret), K(thread_count), K(thread_idx)); + } else if (0 == thread_idx) { + lib::set_thread_name("CDC-READY-TX-HANDLER"); + if (OB_FAIL(push_ready_trans_to_seq_queue_())) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push_ready_trans_to_seq_queue_ failed", KR(ret)); + } } - ob_usleep(SLEEP_US); - - if (REACH_TIME_INTERVAL(PRINT_SEQ_INFO_INTERVAL)) { - ISTAT("[OUTPUT]", K(global_checkpoint_), K(last_global_checkpoint_), "checkpoint_delay", NTS_TO_DELAY(global_checkpoint_), - K(global_seq_), "size", trans_queue_.size()); + } else if (1 == thread_idx) { + lib::set_thread_name("CDC-SEQ-TX-HANDLER"); + if (OB_FAIL(handle_trans_in_seq_queue_())) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_trans_in_seq_queue_ failed", KR(ret)); + } } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("unexpect sequencer thread", KR(ret), K(thread_count), K(thread_idx)); } - if (OB_SUCC(ret) && lib::ThreadPool::has_set_stop()) { ret = OB_IN_STOP_STATE; } // exit on fail if (OB_SUCCESS != ret && OB_IN_STOP_STATE != ret && NULL != err_handler_) { - err_handler_->handle_error(ret, "sequencer thread exits, err=%d", ret); - ObLogSequencer::stop(); + err_handler_->handle_error(ret, "sequencer thread(idx=%ld) exits, err=%d", get_thread_idx(), ret); + mark_stop_flag(); } } -int ObLogSequencer::handle_to_be_sequenced_trans_(TrxSortElem &trx_sort_elem, +int ObLogSequencer::push_ready_trans_to_seq_queue_() +{ + int ret = OB_SUCCESS; + ObLogTraceIdGuard trace_guard; + + while (OB_SUCC(ret) && ! lib::ThreadPool::has_set_stop()) { + bool is_trans_can_be_output = true; + if (! ready_trans_queue_.empty()) { + // notice the lock scope + ObByteLockGuard guard(trans_queue_lock_); + TrxSortElem top_trx_sort_elem = ready_trans_queue_.top(); + const int64_t global_trans_version = top_trx_sort_elem.get_trans_commit_version(); + TransCtx *trans_ctx = top_trx_sort_elem.get_trans_ctx_host(); + is_trans_can_be_output = (global_trans_version <= ATOMIC_LOAD(&global_checkpoint_)); + if (is_trans_can_be_output) { + if (OB_FAIL(seq_trans_queue_.push(trans_ctx))) { + if (OB_SIZE_OVERFLOW != ret) { + LOG_ERROR("push trans_ctx into seq_trans_queue failed", KR(ret)); + // push failed, signal consumer wakeup + seq_queue_cond_.signal(); + } else { + // seq_trans_queue is full, will retry in next round + ret = OB_SUCCESS; + // wait seq_trans_queue not full + seq_queue_cond_.timedwait(DATA_OP_TIMEOUT); + } + } else { + ready_trans_queue_.pop(); + // push success, signal handle_trans_in_seq_queue_ to consume + seq_queue_cond_.signal(); + } + } + } else { + // wait trans assembled and push into ready_trans_queue + ready_queue_cond_.timedwait(DATA_OP_TIMEOUT); + } + if (!is_trans_can_be_output) { + // wait checkpoint advance + // can't put into ObByteLockGuard(trans_queue_lock_) in case of deak lock of trans_queue_lock_ + checkpoint_cond_.timedwait(DATA_OP_TIMEOUT); + } + if (REACH_TIME_INTERVAL(PRINT_SEQ_INFO_INTERVAL)) { + ISTAT("[OUTPUT]", "DELAY", NTS_TO_DELAY(global_checkpoint_), + K_(global_checkpoint), K_(last_global_checkpoint), K_(global_seq), + "ready_trans_count", ready_trans_queue_.size(), + "sequenced_trans_count", seq_trans_queue_.get_curr_total()); + } + } // end while + + return ret; +} + +int ObLogSequencer::handle_trans_in_seq_queue_() +{ + int ret = OB_SUCCESS; + ObLogTraceIdGuard trace_guard; + + while (OB_SUCC(ret) && ! lib::ThreadPool::has_set_stop()) { + bool seq_queue_is_empty = false; + TransCtx *trans_ctx = nullptr; + ObLogTraceIdGuard trace_guard; + if (OB_FAIL(seq_trans_queue_.pop(trans_ctx))) { + if (OB_ENTRY_NOT_EXIST == ret) { + seq_queue_is_empty = true; + ret = OB_SUCCESS; + } else { + LOG_ERROR("pop TransCtx from seq_trans_queue failed", KR(ret)); + } + } else if (OB_FAIL(handle_sequenced_trans_(trans_ctx, lib::ThreadPool::has_set_stop()))) { + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("handle_sequenced_trans_ failed", KR(ret)); + } + } + if (seq_queue_is_empty) { + // wait data push into seq_trans_queue + seq_queue_cond_.timedwait(DATA_OP_TIMEOUT); + } else { + // consume data in seq_trans_queue, signal producer to push data + // or consume failed, signal producer wakeup + seq_queue_cond_.signal(); + } + } // end while + + return ret; +} + +int ObLogSequencer::handle_sequenced_trans_( + TransCtx *trans_ctx, volatile bool &stop_flag) { int ret = OB_SUCCESS; bool enable_monitor = false; ObLogTimeMonitor monitor("Sequencer::handle_tobe_sequenced_trans", enable_monitor); - TransCtx *trans_ctx = trx_sort_elem.get_trans_ctx_host(); - const int64_t new_seq = ATOMIC_FAA(&global_seq_, 1); - int64_t new_schema_version = 0; if (OB_UNLIKELY(! inited_)) { ret = OB_NOT_INIT; LOG_ERROR("ObLogSequencer has not been initialized", KR(ret)); } else if (OB_ISNULL(trans_ctx)) { - LOG_ERROR("trans_ctx is NULL", K(trx_sort_elem)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("trans_ctx is NULL", KR(ret)); } else { + const int64_t new_seq = ATOMIC_FAA(&global_seq_, 1); + int64_t new_schema_version = 0; const int64_t participant_count = trans_ctx->get_ready_participant_count(); PartTransTask *participant_list = trans_ctx->get_participant_objs(); const bool is_dml_trans = participant_list->is_dml_trans(); @@ -342,7 +422,7 @@ int ObLogSequencer::handle_to_be_sequenced_trans_(TrxSortElem &trx_sort_elem, K(is_dml_trans), K(local_schema_version), K(new_schema_version)); // sequence } else if (OB_FAIL(trans_ctx->sequence(new_seq, new_schema_version))) { - LOG_ERROR("trans_ctx sequence fail", KR(ret), K(trx_sort_elem), K(new_seq), K(new_schema_version)); + LOG_ERROR("trans_ctx sequence fail", KR(ret), K(new_seq), K(new_schema_version)); } else { monitor.mark_and_get_cost("sequence_done", true); if (OB_FAIL(trans_ctx->wait_data_ready(WAIT_TIMEOUT, stop_flag))) { @@ -385,7 +465,7 @@ int ObLogSequencer::handle_to_be_sequenced_trans_(TrxSortElem &trx_sort_elem, } } - LOG_TRACE("handle_to_be_sequenced_trans_ end", KR(ret), K(trans_id), K(trx_sort_elem)); + LOG_TRACE("handle_sequenced_trans_ end", KR(ret), K(trans_id), KPC(trans_ctx)); } return ret; @@ -467,6 +547,10 @@ int ObLogSequencer::handle_global_hb_part_trans_task_(PartTransTask &part_trans_ last_global_checkpoint_ = cur_global_checkpoint; // udpate current checkpoint ATOMIC_STORE(&global_checkpoint_, global_checkpoint); + if (global_checkpoint > cur_global_checkpoint) { + // signal push_ready_trans_to_seq_queue_ + checkpoint_cond_.signal(); + } LOG_DEBUG("handle_global_hb_part_trans_task_", K(part_trans_task), K(last_global_checkpoint_), K(global_checkpoint_), "delay", NTS_TO_DELAY(global_checkpoint_)); @@ -758,13 +842,18 @@ int ObLogSequencer::handle_participants_ready_trans_(const bool is_dml_trans, if (OB_SUCC(ret)) { TrxSortElem &trx_sort_elem = trans_ctx->get_trx_sort_elem(); - ObByteLockGuard guard(trans_queue_lock_); - trans_queue_.push(trx_sort_elem); + { + ObByteLockGuard guard(trans_queue_lock_); + ready_trans_queue_.push(trx_sort_elem); + } + // signal push_ready_trans_to_seq_queue_ + ready_queue_cond_.signal(); - _DSTAT("[TRANS_QUEUE] TENANT_ID=%lu TRANS_ID=%s QUEUE_SIZE=%lu IS_DML=%d", + _DSTAT("[TRANS_QUEUE] TENANT_ID=%lu TRANS_ID=%s QUEUE_SIZE=(%lu/%ld) IS_DML=%d", tenant_id, to_cstring(trx_sort_elem), - trans_queue_.size(), + ready_trans_queue_.size(), + seq_trans_queue_.get_curr_total(), is_dml_trans); } } diff --git a/src/logservice/libobcdc/src/ob_log_sequencer1.h b/src/logservice/libobcdc/src/ob_log_sequencer1.h index eca070b29f..d8c5a6bd4c 100644 --- a/src/logservice/libobcdc/src/ob_log_sequencer1.h +++ b/src/logservice/libobcdc/src/ob_log_sequencer1.h @@ -60,6 +60,7 @@ public: dml_part_trans_task_count_ = 0; hb_part_trans_task_count_ = 0; queue_part_trans_task_count_ = 0; + ready_trans_count_ = 0; sequenced_trans_count_ = 0; } int64_t total_part_trans_task_count_ CACHE_ALIGNED; @@ -67,6 +68,7 @@ public: int64_t dml_part_trans_task_count_ CACHE_ALIGNED; int64_t hb_part_trans_task_count_ CACHE_ALIGNED; int64_t queue_part_trans_task_count_ CACHE_ALIGNED; + int64_t ready_trans_count_ CACHE_ALIGNED; int64_t sequenced_trans_count_ CACHE_ALIGNED; }; @@ -112,7 +114,10 @@ public: public: int start(); void stop(); - void mark_stop_flag() { SequencerThread::mark_stop_flag(); } + void mark_stop_flag() { + SequencerThread::mark_stop_flag(); + lib::ThreadPool::stop(); + } int push(PartTransTask *task, volatile bool &stop_flag); void get_task_count(SeqStatInfo &stat_info); int64_t get_thread_num() const { return SequencerThread::get_thread_num(); } @@ -136,11 +141,16 @@ private: static const int64_t WAIT_TIMEOUT = 10 * _SEC_; typedef libobcdc::TransCtxSortElement TrxSortElem; typedef libobcdc::TransCtxSortElement::TransCtxCmp TrxCmp; - typedef std::priority_queue, TrxCmp> TransQueue; + typedef std::priority_queue, TrxCmp> ReadyTransQueue; + typedef ObFixedQueue SeqTransQueue; private: void run1() final; - int handle_to_be_sequenced_trans_(TrxSortElem &trx_sort_elem, + // try push trans into seq_trans_queue which trans_version is less than checkpoint + int push_ready_trans_to_seq_queue_(); + int handle_trans_in_seq_queue_(); + int handle_sequenced_trans_( + TransCtx *trans_ctx, volatile bool &stop_flag); int handle_global_hb_part_trans_task_(PartTransTask &part_trans_task, volatile bool &stop_flag); @@ -209,8 +219,14 @@ private: int64_t last_global_checkpoint_ CACHE_ALIGNED; uint64_t global_seq_ CACHE_ALIGNED; uint64_t br_committer_queue_seq_ CACHE_ALIGNED; - TransQueue trans_queue_; + // Store assembled distributed transactions + ReadyTransQueue ready_trans_queue_; common::ObByteLock trans_queue_lock_; + // Store transactions that can be outputted (trans commit_version greater than the global checkpoint). + SeqTransQueue seq_trans_queue_; + common::ObCond checkpoint_cond_; + common::ObCond ready_queue_cond_; + common::ObCond seq_queue_cond_; // Counting the number of partitioned tasks owned by Sequencer int64_t total_part_trans_task_count_ CACHE_ALIGNED; diff --git a/src/logservice/libobcdc/src/ob_log_storager.cpp b/src/logservice/libobcdc/src/ob_log_storager.cpp index 8b0f9c6544..2ec8b425cb 100644 --- a/src/logservice/libobcdc/src/ob_log_storager.cpp +++ b/src/logservice/libobcdc/src/ob_log_storager.cpp @@ -83,6 +83,7 @@ int ObLogStorager::init(const int64_t thread_num, void ObLogStorager::destroy() { + stop(); if (inited_) { LOG_INFO("store_service destroy begin"); StoragerThread::destroy(); @@ -118,6 +119,7 @@ int ObLogStorager::start() void ObLogStorager::stop() { + mark_stop_flag(); if (inited_) { StoragerThread::stop(); LOG_INFO("stop storager threads succ", "thread_num", get_thread_num()); @@ -130,11 +132,14 @@ int ObLogStorager::submit(IObLogBatchBufTask *task) const int64_t timeout = 1000000; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogStorager has not been initialized"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogStorager has not been initialized", KR(ret)); } else if (OB_UNLIKELY(! task->is_valid())) { - LOG_ERROR("invalid arguments", KPC(task)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid arguments", KR(ret), KPC(task)); + } else if (OB_UNLIKELY(is_stoped())) { + ret = OB_IN_STOP_STATE; + LOG_INFO("obcdc storager is in stop state", KR(ret)); } else { int64_t sub_task_count = task->get_subtask_count(); uint64_t hash_value = ATOMIC_FAA(&round_value_, 1); @@ -189,6 +194,9 @@ int ObLogStorager::handle(void *data, const int64_t thread_index, volatile bool } else if (OB_ISNULL(task) || OB_UNLIKELY(! task->is_valid())) { LOG_ERROR("invalid arguments", KPC(task)); ret = OB_INVALID_ARGUMENT; + } else if (OB_UNLIKELY(is_stoped())) { + ret = OB_IN_STOP_STATE; + LOG_INFO("obcdc storager is in stop state", KR(ret)); } else { int64_t sub_task_count = task->get_subtask_count(); @@ -361,7 +369,7 @@ void ObLogStorager::print_task_count_() int ret = OB_SUCCESS; int64_t total_thread_num = get_thread_num(); - for (int64_t idx = 0; OB_SUCC(ret) && idx < total_thread_num; ++idx) { + for (int64_t idx = 0; OB_SUCC(ret) && idx < total_thread_num && ! is_stoped(); ++idx) { int64_t task_count = 0; if (OB_FAIL(get_task_num(idx, task_count))) { LOG_ERROR("get_task_num fail", K(ret)); diff --git a/src/logservice/libobcdc/src/ob_log_systable_helper.cpp b/src/logservice/libobcdc/src/ob_log_systable_helper.cpp index e08cf7df2f..37f59dc6f3 100644 --- a/src/logservice/libobcdc/src/ob_log_systable_helper.cpp +++ b/src/logservice/libobcdc/src/ob_log_systable_helper.cpp @@ -747,8 +747,7 @@ int ObLogSysTableHelper::init(SvrProvider &svr_provider, || OB_ISNULL(mysql_user) || OB_ISNULL(mysql_password) || OB_ISNULL(mysql_db)) { - LOG_ERROR("invalid arguments", K(access_systable_helper_thread_num), K(mysql_user), - K(mysql_password), K(mysql_db)); + LOG_ERROR("invalid arguments", K(access_systable_helper_thread_num), K(mysql_user), K(mysql_db)); ret = OB_INVALID_ARGUMENT; } else { int64_t max_thread_num = access_systable_helper_thread_num; @@ -785,7 +784,7 @@ int ObLogSysTableHelper::init(SvrProvider &svr_provider, thread_counter_ = 0; inited_ = true; - LOG_INFO("init systable helper succ", K(mysql_user_), K(mysql_password_), K(mysql_db_), + LOG_INFO("init systable helper succ", K(mysql_user_), K(mysql_db_), K(access_systable_helper_thread_num)); } } @@ -1502,7 +1501,7 @@ int ObLogSysTableHelper::change_to_next_server_(const int64_t svr_idx, ObLogMySQ } } else if (OB_FAIL(conn_config.reset(svr, mysql_user_, mysql_password_, mysql_db_, mysql_connect_timeout_sec, mysql_query_timeout_sec))) { - LOG_ERROR("reset mysql config fail", KR(ret), K(svr), K(mysql_user_), K(mysql_password_), + LOG_ERROR("reset mysql config fail", KR(ret), K(svr), K(mysql_user_), K(mysql_db_), K(mysql_connect_timeout_sec), K(mysql_query_timeout_sec)); } else { LOG_INFO("connect to next mysql server", "cur_server", conn.get_server(), diff --git a/src/logservice/libobcdc/src/ob_log_task_pool.h b/src/logservice/libobcdc/src/ob_log_task_pool.h index 71fa36e02e..a9e7354031 100644 --- a/src/logservice/libobcdc/src/ob_log_task_pool.h +++ b/src/logservice/libobcdc/src/ob_log_task_pool.h @@ -98,12 +98,13 @@ public: // Should provide allocator, and the size of prealloc task number. int init(common::ObIAllocator *task_alloc, const int64_t prealloc_pool_size, - const int64_t trans_task_page_size, const bool allow_dynamic_alloc, const int64_t prealloc_page_count) { int ret = common::OB_SUCCESS; const int64_t start_ts = get_timestamp(); + const int64_t trans_task_page_size = OB_MALLOC_NORMAL_BLOCK_SIZE; + if (OB_UNLIKELY(inited_)) { ret = common::OB_INIT_TWICE; OBLOG_LOG(WARN, "already init", KR(ret)); diff --git a/src/logservice/libobcdc/src/ob_log_timezone_info_getter.cpp b/src/logservice/libobcdc/src/ob_log_timezone_info_getter.cpp index 61928ddd0d..83e6f687dc 100644 --- a/src/logservice/libobcdc/src/ob_log_timezone_info_getter.cpp +++ b/src/logservice/libobcdc/src/ob_log_timezone_info_getter.cpp @@ -507,8 +507,8 @@ int ObCDCTimeZoneInfoGetter::refresh_tenant_timezone_info_map_( SMART_VAR(ObMySQLProxy::MySQLResult, res) { sqlclient::ObMySQLResult *result = nullptr; if (OB_ISNULL(mysql_proxy_)) { - LOG_ERROR("mysql_proxy_ is null", K(mysql_proxy_)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("mysql_proxy_ is null", KR(ret), K(mysql_proxy_)); } else if (! need_fetch_timezone_info_by_tennat_()) { if (OB_FAIL(mysql_proxy_->read(res, ObTimeZoneInfoManager::FETCH_TZ_INFO_SQL))) { LOG_WARN("fail to execute sql", KR(ret)); @@ -541,7 +541,8 @@ int ObCDCTimeZoneInfoGetter::refresh_all_tenant_timezone_info_() int ret = OB_SUCCESS; if (OB_ISNULL(mysql_proxy_)) { - LOG_ERROR("mysql_proxy_ is null", K(mysql_proxy_)); + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("mysql_proxy_ is null", KR(ret), K(mysql_proxy_)); } else { // Requires locking to prevent multi-threaded access: formatter and ObCDCTimeZoneInfoGetter query threads themselves SpinWLockGuard guard(lock_); @@ -567,8 +568,8 @@ int ObCDCTimeZoneInfoGetter::query_timezone_info_version_( bool done = false; if (OB_ISNULL(systable_helper_)) { - LOG_ERROR("systable_helper_ is null", K(systable_helper_)); ret = OB_ERR_UNEXPECTED; + LOG_ERROR("systable_helper_ is null", KR(ret), K(systable_helper_)); } else { while (! done && OB_SUCC(ret) && ! stop_flag_) { if (OB_FAIL(systable_helper_->query_timezone_info_version(tenant_id, timezone_info_version))) { diff --git a/src/logservice/libobcdc/src/ob_log_trans_dispatch_ctx.cpp b/src/logservice/libobcdc/src/ob_log_trans_dispatch_ctx.cpp index 435b46d8b7..31a1024fe4 100644 --- a/src/logservice/libobcdc/src/ob_log_trans_dispatch_ctx.cpp +++ b/src/logservice/libobcdc/src/ob_log_trans_dispatch_ctx.cpp @@ -18,6 +18,7 @@ #include "ob_log_config.h" // TCONF #include "ob_log_instance.h" // TCTX #include "ob_log_trans_msg_sorter.h" // IObLogTransMsgSorter +#include "ob_cdc_auto_config_mgr.h" // CDC_CFG_MGR namespace oceanbase { @@ -28,6 +29,7 @@ TransDispatchCtx::TransDispatchCtx() : trans_id_(), total_part_count_(0), dispatched_part_count_(0), + is_dispatching_(false), normal_priority_part_budget_arr_(), high_priority_part_budget_arr_() {} @@ -39,6 +41,7 @@ int TransDispatchCtx::init(TransCtx &trans) trans_id_ = trans.get_trans_id(); total_part_count_ = trans.get_ready_participant_count(); PartTransTask *part_trans_task = trans.get_participant_objs(); + is_dispatching_ = false; while(OB_SUCC(ret) && OB_NOT_NULL(part_trans_task)) { PartTransTask *next_part_trans = part_trans_task->next_task(); @@ -59,6 +62,7 @@ void TransDispatchCtx::reset() trans_id_.reset(); total_part_count_ = 0; dispatched_part_count_ = 0; + is_dispatching_ = false; normal_priority_part_budget_arr_.reset(); high_priority_part_budget_arr_.reset(); } @@ -83,6 +87,7 @@ int TransDispatchCtx::reblance_budget( const int64_t total_count = get_total_need_reblance_part_cnt_(); const int64_t memory_limit_ratio_for_output_by_sql_operaiton = TCONF.redo_dispatched_memory_limit_exceed_ratio; + // won't use this cause OBCDC will dispatch more if detect skew part and budget is 0; if (total_count <= 0) { ret = OB_ERR_UNEXPECTED; @@ -93,7 +98,7 @@ int TransDispatchCtx::reblance_budget( // all part should dispatch at least one redo, so budget_value should + 1 in case of budget is 0 int64_t dispatch_budget = total_budget > 0 ? total_budget : 0; - const int64_t average_budget = current_used_memory >= redo_memory_limit * memory_limit_ratio_for_output_by_sql_operaiton ? + const int64_t average_budget = current_used_memory >= redo_memory_limit ? 0 : (dispatch_budget / total_count) + 1; set_normal_priority_budget_(average_budget); LOG_DEBUG("reblance budget result:", K(total_budget), K(total_count), K(average_budget)); @@ -111,18 +116,36 @@ int64_t TransDispatchCtx::get_total_need_reblance_part_cnt_() const void TransDispatchCtx::set_normal_priority_budget_(const int64_t &average_budget) { + const static int64_t PRINT_STAT_INTERVAL = 10 * _SEC_; + const bool need_pause = TCTX.need_pause_redo_dispatch(); + IObLogTransMsgSorter *msg_sorter = TCTX.trans_msg_sorter_; + const bool is_new_trans_can_dispatch = (! is_dispatching_ && average_budget > 0 && !need_pause); + for(int64_t i = 0; i < normal_priority_part_budget_arr_.count(); i++) { PartTransDispatchBudget &budget = normal_priority_part_budget_arr_[i]; PartTransTask *part_trans_task = budget.part_trans_task_; const static int64_t PRINT_STAT_INTERVAL = 10 * _SEC_; - IObLogTransMsgSorter *msg_sorter = TCTX.trans_msg_sorter_; - if (average_budget <= 0 + if (is_new_trans_can_dispatch) { + // only dispatch 1 redo for each part of trans for the first round dispatch + budget.reset_budget(1); + } else if (need_pause || ! is_dispatching_) { + if (REACH_TIME_INTERVAL(PRINT_STAT_INTERVAL)) { + LOG_INFO("[NOTICE][REDO_DISPATCH][PAUSE]", + K(budget), + K(average_budget), + K_(is_dispatching), + "trans_id", part_trans_task->get_trans_id(), + "tls_id", part_trans_task->get_tls_id(), + "redo_sorted_progress", part_trans_task->get_sorted_redo_list().sorted_progress_); + } + budget.reset_budget(0); + } else if (average_budget <= 0 && OB_NOT_NULL(part_trans_task) && OB_NOT_NULL(msg_sorter) && (part_trans_task->get_trans_id() == msg_sorter->get_cur_sort_trans_id()) // wait last trans handled in sorter && part_trans_task->is_dispatched_redo_be_sorted()) { - const int64_t extra_redo_dispatch_size = TCONF.extra_redo_dispatch_memory_size; + const int64_t extra_redo_dispatch_size = CDC_CFG_MGR.get_extra_redo_dispatch_memory_size(); if (REACH_TIME_INTERVAL(PRINT_STAT_INTERVAL)) { LOG_INFO("[NOTICE][REDO_DISPATCH][DATA_SKEW] budget usedup but dispatched_redo all sorted, use extra_redo budget", @@ -138,6 +161,9 @@ void TransDispatchCtx::set_normal_priority_budget_(const int64_t &average_budget budget.reset_budget(average_budget); } } + if (is_new_trans_can_dispatch) { + is_dispatching_ = true; + } } diff --git a/src/logservice/libobcdc/src/ob_log_trans_dispatch_ctx.h b/src/logservice/libobcdc/src/ob_log_trans_dispatch_ctx.h index 33c3e21b10..2dd0f7e67a 100644 --- a/src/logservice/libobcdc/src/ob_log_trans_dispatch_ctx.h +++ b/src/logservice/libobcdc/src/ob_log_trans_dispatch_ctx.h @@ -120,6 +120,9 @@ private: transaction::ObTransID trans_id_; int64_t total_part_count_; int64_t dispatched_part_count_; + // is_dispatching_ = false means not dispatch any redo, + // will only dispatch one redo for the first round + bool is_dispatching_; // assume PartTransTask that all redo dispatched will be removed from arr while // RedoDispatcher::dispatch_part_redo_with_budget PartBudgetArray normal_priority_part_budget_arr_; diff --git a/src/logservice/libobcdc/src/ob_log_trans_msg_sorter.cpp b/src/logservice/libobcdc/src/ob_log_trans_msg_sorter.cpp index 5ec4aaa5c0..e341446415 100644 --- a/src/logservice/libobcdc/src/ob_log_trans_msg_sorter.cpp +++ b/src/logservice/libobcdc/src/ob_log_trans_msg_sorter.cpp @@ -34,8 +34,8 @@ ret = (var).func(args); \ if (err_no == ret) { \ retry_cnt ++; \ - if (0 == retry_cnt % 1000) { \ - LOG_WARN(#func " retry for too many times", KP(&var), K(var), K(retry_cnt)); \ + if (0 == retry_cnt % 12000) { \ + LOG_WARN(#func " retry for too many times", K(retry_cnt), KP(&var), K(var)); \ } \ /* sleep 5 ms*/ \ ob_usleep(5 * 1000); \ diff --git a/src/logservice/libobcdc/src/ob_log_trans_redo_dispatcher.cpp b/src/logservice/libobcdc/src/ob_log_trans_redo_dispatcher.cpp index 7f4bf6dbdc..3628d679ac 100644 --- a/src/logservice/libobcdc/src/ob_log_trans_redo_dispatcher.cpp +++ b/src/logservice/libobcdc/src/ob_log_trans_redo_dispatcher.cpp @@ -18,6 +18,7 @@ #include "ob_log_instance.h" #include "ob_log_trans_redo_dispatcher.h" +#include "ob_cdc_auto_config_mgr.h" namespace oceanbase { @@ -72,7 +73,7 @@ void ObLogTransRedoDispatcher::destroy() void ObLogTransRedoDispatcher::configure(const ObLogConfig &config) { - const int64_t redo_mem_limit = config.redo_dispatcher_memory_limit.get(); + const int64_t redo_mem_limit = CDC_CFG_MGR.get_redo_dispatcher_memory_limit(); ATOMIC_SET(&redo_memory_limit_, redo_mem_limit); LOG_INFO("[CONFIG][REDO_DISPATCHER]", "redo_dispatcher_memory_limit", redo_mem_limit, "to_size", SIZE_TO_STR(redo_mem_limit)); } @@ -171,8 +172,10 @@ int ObLogTransRedoDispatcher::dispatch_by_turn_(TransCtx &trans, volatile bool & LOG_ERROR("failed to batch dispatch redo", KR(ret), K_(enable_sort_by_seq_no), K(trans), K(stop_flag)); } } else if (!trans_dispatch_ctx_.is_trans_dispatched()) { - ob_usleep(100); // sleep 100 us - if (OB_UNLIKELY(++retry_cnt % 1000 == 0)) { + ob_usleep(200); // sleep 200 us + if (OB_UNLIKELY(++retry_cnt % 50000 == 0)) { + // print each 5 sec + // TODO: simply log content LOG_WARN("trans dispatch_by_turn for too many times", KR(ret), K(retry_cnt), K(trans), K_(trans_dispatch_ctx)); } } else { diff --git a/src/logservice/libobcdc/src/ob_ls_worker.cpp b/src/logservice/libobcdc/src/ob_ls_worker.cpp index 6be7765e9e..9dd5e92218 100644 --- a/src/logservice/libobcdc/src/ob_ls_worker.cpp +++ b/src/logservice/libobcdc/src/ob_ls_worker.cpp @@ -193,7 +193,9 @@ int ObLSWorker::dispatch_fetch_task(LSFetchCtx &task, const char *dispatch_reaso LOG_DEBUG("[STAT] [STREAM_WORKER] [RECYCLE_FETCH_TASK]", "task", &task, K(task)); if (OB_FAIL(dead_pool_->push(&task))) { - LOG_DEBUG("push task into dead pool fail", KR(ret), K(task)); + if (OB_IN_STOP_STATE != ret) { + LOG_DEBUG("push task into dead pool fail", KR(ret), K(task)); + } } } else if (is_integrated_fetching_mode(task.get_fetching_mode())) { ObAddr request_svr; @@ -240,7 +242,9 @@ int ObLSWorker::dispatch_fetch_task(LSFetchCtx &task, const char *dispatch_reaso "dispatch to idle pool", "task", &task, K(task)); if (OB_FAIL(idle_pool_->push(&task))) { - LOG_ERROR("push into idle pool fail", KR(ret), K(task)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push into idle pool fail", KR(ret), K(task)); + } } } else { LOG_DEBUG("[STAT] [STREAM_WORKER] [DISPATCH_FETCH_TASK] dispatch to next server", @@ -286,7 +290,9 @@ int ObLSWorker::dispatch_stream_task(FetchStream &task, const char *from_mod) // Rotating the task of fetching log streams to work threads if (OB_FAIL(StreamWorkerThread::push(&task, hash_val))) { - LOG_ERROR("push stream task into thread queue fail", KR(ret)); + if (OB_IN_STOP_STATE != ret) { + LOG_ERROR("push stream task into thread queue fail", KR(ret)); + } } } return ret; @@ -331,8 +337,9 @@ int ObLSWorker::handle(void *data, ret = OB_INVALID_ARGUMENT; } // If the stream task is currently suspended, the task is put to sleep - // DDL tasks are exempt from suspend and require always processing - else if (OB_UNLIKELY(is_paused) && ! task->is_sys_log_stream()) { + // 1. DDL tasks are exempt from suspend and require always processing + // 2. ready rpc(response already return) should always processing + else if (OB_UNLIKELY(is_paused) && ! (task->is_sys_log_stream() || task->is_rpc_ready())) { LOG_DEBUG("[STAT] [STREAM_WORKER] [HIBERNATE_STREAM_TASK_ON_PAUSE]", K(task)); if (OB_FAIL(hibernate_stream_task(*task, "PausedFetcher"))) { diff --git a/src/logservice/libobcdc/src/ob_map_queue_thread.h b/src/logservice/libobcdc/src/ob_map_queue_thread.h index 3579d45dd2..27780f1e0e 100644 --- a/src/logservice/libobcdc/src/ob_map_queue_thread.h +++ b/src/logservice/libobcdc/src/ob_map_queue_thread.h @@ -156,7 +156,7 @@ int ObMapQueueThread::init(const int64_t thread_num, const char } thread_num_ = thread_num; - stop_flag_ = true; + stop_flag_ = false; inited_ = true; } @@ -185,7 +185,7 @@ int ObMapQueueThread::start() if (OB_UNLIKELY(! inited_)) { LIB_LOG(ERROR, "not inited"); ret = OB_NOT_INIT; - } else if (stop_flag_) { + } else { stop_flag_ = false; for (int64_t index = 0; OB_SUCCESS == ret && index < thread_num_; index++) { @@ -204,8 +204,8 @@ int ObMapQueueThread::start() template void ObMapQueueThread::stop() { + mark_stop_flag(); if (inited_) { - stop_flag_ = true; for (int64_t index = 0; index < thread_num_; index++) { ThreadConf &tc = tc_[index]; @@ -274,8 +274,8 @@ int ObMapQueueThread::pop(const int64_t thread_index, void *&dat LIB_LOG(ERROR, "not init"); ret = OB_NOT_INIT; } else if (OB_UNLIKELY(thread_index < 0) || OB_UNLIKELY(thread_index >= thread_num_)) { - LIB_LOG(ERROR, "invalid thread index", K(thread_index), K(thread_num_)); ret = OB_ERR_UNEXPECTED; + LIB_LOG(ERROR, "invalid thread index", KR(ret), K(thread_index), K(thread_num_)); } else { ret = tc_[thread_index].queue_.pop(data); } @@ -305,7 +305,7 @@ int ObMapQueueThread::next_task_(const int64_t index, void *&tas ret = OB_ERR_UNEXPECTED; } else { ThreadConf &tc = tc_[index]; - while (! stop_flag_ && OB_SUCCESS == ret) { + while (! is_stoped() && OB_SUCCESS == ret) { task = NULL; if (OB_FAIL(tc.queue_.pop(task))) { @@ -317,8 +317,8 @@ int ObMapQueueThread::next_task_(const int64_t index, void *&tas LIB_LOG(ERROR, "pop task from queue fail", KR(ret)); } } else if (OB_ISNULL(task)) { - LIB_LOG(ERROR, "pop invalid task", K(task)); ret = OB_ERR_UNEXPECTED; + LIB_LOG(ERROR, "pop invalid task", KR(ret), K(task)); } else { break; } @@ -338,14 +338,14 @@ int ObMapQueueThread::push(void *data, const uint64_t hash_val) int ret = OB_SUCCESS; if (OB_UNLIKELY(! inited_)) { - LIB_LOG(ERROR, "not init"); ret = OB_NOT_INIT; + LIB_LOG(ERROR, "not init", KR(ret), K_(inited)); } else if (OB_UNLIKELY(is_stoped())) { ret = OB_IN_STOP_STATE; LIB_LOG(INFO, "thread pool is not running", KR(ret), K_(stop_flag)); } else if (OB_ISNULL(data)) { - LIB_LOG(ERROR, "invalid argument", K(data)); ret = OB_INVALID_ARGUMENT; + LIB_LOG(ERROR, "invalid argument", KR(ret), K(data)); } else { int64_t target_index = static_cast(hash_val % thread_num_); ThreadConf &tc = tc_[target_index]; diff --git a/src/logservice/libobcdc/tests/demo/obcdc_demo.cpp b/src/logservice/libobcdc/tests/demo/obcdc_demo.cpp index be833c961b..65608171f6 100644 --- a/src/logservice/libobcdc/tests/demo/obcdc_demo.cpp +++ b/src/logservice/libobcdc/tests/demo/obcdc_demo.cpp @@ -12,6 +12,7 @@ * obcdc_demo */ +#include #include #include "include/libobcdc/libobcdc.h" #include "include/libobcdc/ob_errno.h" @@ -24,9 +25,11 @@ typedef IBinlogRecord Record; #define LOG(msg) \ do { \ - std::cout << msg << std::endl; \ + std::cout << "[OBCDC][DEMO] " << msg << std::endl; \ } while (0) +volatile bool stop_flag = false; + int create_obcdc_instance(ObCDCFactory &cdc_factory, IObCDCInstance *&obcdc_instance) { int ret = OB_SUCCESS; @@ -42,6 +45,7 @@ int create_obcdc_instance(ObCDCFactory &cdc_factory, IObCDCInstance *&obcdc_inst void destroy_obcdc_instance(ObCDCFactory &cdc_factory, IObCDCInstance *obcdc_instance) { obcdc_instance->stop(); + obcdc_instance->destroy(); cdc_factory.deconstruct(obcdc_instance); } @@ -71,6 +75,8 @@ int fetch_next_cdc_record(IObCDCInstance &obcdc_instance, Record *record) } else if (NULL == record) { ret = OB_ERR_UNEXPECTED; LOG("invalid record"); + } else { + LOG("FETCH_RECORD SUCC"); } return ret; @@ -91,9 +97,25 @@ int handle_cdc_record(Record *record) return ret; } +void handle_signal(int signo) +{ + switch (signo) + { + case SIGTERM: + case SIGHUP: + LOG("[SIGNAL] obcdc recv SIG TERM, will exit progress"); + stop_flag = true; + break; + default: + LOG("[SIGNAL] obcdc recv unknown signal, skip"); + break; + } +} + int main(int argc, char **argv) { int ret = OB_SUCCESS; + signal(SIGTERM, handle_signal); ObCDCFactory cdc_factory; IObCDCInstance *obcdc_instance = NULL; @@ -106,7 +128,7 @@ int main(int argc, char **argv) if (OB_SUCCESS != init_obcdc_instance(*obcdc_instance)) { LOG("[ERROR] obcdc_instance init failed"); } else { - while(OB_SUCCESS == ret) { + while(OB_SUCCESS == ret && ! stop_flag) { Record *record = NULL; if (OB_SUCCESS != (ret = fetch_next_cdc_record(*obcdc_instance, record))) { if (OB_TIMEOUT == ret) { diff --git a/src/logservice/libobcdc/tests/ob_binlog_record_printer.h b/src/logservice/libobcdc/tests/ob_binlog_record_printer.h index 488b7ada1e..4990adb32e 100644 --- a/src/logservice/libobcdc/tests/ob_binlog_record_printer.h +++ b/src/logservice/libobcdc/tests/ob_binlog_record_printer.h @@ -40,6 +40,7 @@ public: virtual ~IObBinlogRecordPrinter() {} public: + virtual bool need_print_binlog_record() = 0; virtual int print_binlog_record(IBinlogRecord *br) = 0; }; @@ -55,6 +56,8 @@ public: virtual ~ObBinlogRecordPrinter(); public: + virtual bool need_print_binlog_record() + { return enable_print_console_ || heartbeat_file_fd_ > 0 || data_file_fd_ > 0; }; virtual int print_binlog_record(IBinlogRecord *br); public: diff --git a/src/logservice/libobcdc/tests/obcdc_main.cpp b/src/logservice/libobcdc/tests/obcdc_main.cpp index 50490c8490..08710bd547 100644 --- a/src/logservice/libobcdc/tests/obcdc_main.cpp +++ b/src/logservice/libobcdc/tests/obcdc_main.cpp @@ -17,6 +17,7 @@ #include "obcdc_main.h" #include "ob_log_instance.h" // ObLogInstance #include "share/ob_time_zone_info_manager.h" // FETCH_TZ_INFO_SQL +#include "ob_log_trans_ctx.h" #include // fprintf #include // getopt_long @@ -129,7 +130,7 @@ int ObLogMain::parse_args_(int argc, char **argv) // option variables int opt = -1; - const char *opt_string = "iIvcdD:f:hH:oVt:rR:OxmT:Pp:"; + const char *opt_string = "iIvcdD:f:hH:oVt:rR:OxmT:Pp:s"; struct option long_opts[] = { {"print_dml_checksum", 0, NULL, 'c'}, @@ -153,6 +154,7 @@ int ObLogMain::parse_args_(int argc, char **argv) {"output_br_detail", 0, NULL, 'i'}, {"output_br_special_detail", 0, NULL, 'I'}, {"parse_timezone_info", 0, NULL, 'p'}, + {"delay_release", 0, NULL, 's'}, {0, 0, 0, 0} }; @@ -253,6 +255,11 @@ int ObLogMain::parse_args_(int argc, char **argv) break; } + case 's': { + delay_release_ = true; + break; + } + case 'I': { // output special detail info of binlog record, default off // Such as, ObTraceInfo @@ -344,6 +351,9 @@ int ObLogMain::start() if (OB_FAIL(instance->init_with_start_tstamp_usec(config_file_, start_timestamp_usec_, handle_error))) { LOG_ERROR("init oblog fail", K(ret), K_(config_file), K_(start_timestamp_usec), KP(handle_error)); } else { + _LOG_INFO("sizeof TransCtx: %lu", sizeof(TransCtx)); + _LOG_INFO("sizeof PartTransTask: %lu", sizeof(PartTransTask)); + _LOG_INFO("sizeof LogEntryTask: %lu", sizeof(ObLogEntryTask)); // do nothing } @@ -378,7 +388,7 @@ void ObLogMain::run() { if (inited_ && NULL != obcdc_instance_) { int ret = OB_SUCCESS; - int64_t end_time = ::oceanbase::common::ObTimeUtility::current_time() + run_time_us_; + int64_t end_time = get_timestamp() + run_time_us_; while (OB_SUCCESS == ret && ! stop_flag_) { IBinlogRecord *br = NULL; @@ -386,17 +396,23 @@ void ObLogMain::run() if (OB_SUCC(ret)) { if (OB_FAIL(verify_record_info_(br))) { - LOG_ERROR("verify_record_info_ fail", K(ret), K(br)); + LOG_ERROR("verify_record_info_ fail", KR(ret), K(br)); + } else if (br_printer_.need_print_binlog_record()) { + // output binlog record + if (OB_FAIL(br_printer_.print_binlog_record(br))) { + LOG_ERROR("print_binlog_record fail", KR(ret)); + } } - // output binlog record - else if (OB_FAIL(br_printer_.print_binlog_record(br))) { - LOG_ERROR("print_binlog_record fail", K(ret)); - } else { + if (OB_SUCC(ret)) { + if (OB_UNLIKELY(delay_release_)) { + static const int64_t delay_release_time = 200; // 200 us, output 5K RPS at most + usleep(delay_release_time); + } obcdc_instance_->release_record(br); br = NULL; } } else if (OB_TIMEOUT == ret) { - int64_t left_time = end_time - ::oceanbase::common::ObTimeUtility::current_time(); + int64_t left_time = end_time - get_timestamp(); if (run_time_us_ > 0 && left_time <= 0) { ret = OB_TIMEOUT; } else { @@ -410,7 +426,7 @@ void ObLogMain::run() stop_flag_ = true; ret = OB_SUCCESS; } else { - LOG_ERROR("next_record fail", K(ret)); + LOG_ERROR("next_record fail", KR(ret)); } } } @@ -429,14 +445,14 @@ int ObLogMain::verify_record_info_(IBinlogRecord *br) ObLogBR *oblog_br = NULL; if (OB_UNLIKELY(! inited_)) { - LOG_ERROR("ObLogMain has not inited"); ret = OB_NOT_INIT; + LOG_ERROR("ObLogMain has not inited", KR(ret)); } else if (OB_ISNULL(br)) { - LOG_ERROR("br is null"); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("br is null", KR(ret)); } else if (OB_ISNULL(oblog_br = reinterpret_cast(br->getUserData()))) { - LOG_ERROR("get user data fail", K(br), K(oblog_br)); ret = OB_INVALID_ARGUMENT; + LOG_ERROR("get user data fail", KR(ret), K(br), K(oblog_br)); } else { // heartbeat, updtae last_heartbeat_timestamp_usec_ int64_t checkpoint_timestamp_usec = OB_INVALID_TIMESTAMP; diff --git a/src/logservice/libobcdc/tests/obcdc_main.h b/src/logservice/libobcdc/tests/obcdc_main.h index 0d0c9c4a3b..d885987d9b 100644 --- a/src/logservice/libobcdc/tests/obcdc_main.h +++ b/src/logservice/libobcdc/tests/obcdc_main.h @@ -79,6 +79,7 @@ private: bool verify_mode_; bool enable_reentrant_; bool output_br_detail_; + bool delay_release_; bool output_br_special_detail_; int64_t start_timestamp_usec_; uint64_t tenant_id_; diff --git a/src/logservice/logfetcher/ob_log_fetch_log_rpc.h b/src/logservice/logfetcher/ob_log_fetch_log_rpc.h index e0c5f17245..79e072256e 100644 --- a/src/logservice/logfetcher/ob_log_fetch_log_rpc.h +++ b/src/logservice/logfetcher/ob_log_fetch_log_rpc.h @@ -251,6 +251,7 @@ public: int64_t get_flying_request_count(); void print_flying_request_list(); + bool is_rpc_ready() const { return State::READY == state_; } private: int alloc_rpc_request_(const share::ObLSID &ls_id, diff --git a/src/logservice/logfetcher/ob_log_ls_fetch_stream.h b/src/logservice/logfetcher/ob_log_ls_fetch_stream.h index 1420dc3b29..1525e00275 100644 --- a/src/logservice/logfetcher/ob_log_ls_fetch_stream.h +++ b/src/logservice/logfetcher/ob_log_ls_fetch_stream.h @@ -128,6 +128,9 @@ public: int64_t get_fetch_task_count() const { return 1; } + // is rpc response ready + bool is_rpc_ready() const { return fetch_log_arpc_.is_rpc_ready(); } + int64_t get_rpc_timeout() const { return g_rpc_timeout; } int alloc_fetch_log_srpc(FetchLogSRpc *&fetch_log_srpc); diff --git a/src/logservice/logfetcher/ob_log_start_lsn_locator.cpp b/src/logservice/logfetcher/ob_log_start_lsn_locator.cpp index 230eddb042..3bd24874c7 100644 --- a/src/logservice/logfetcher/ob_log_start_lsn_locator.cpp +++ b/src/logservice/logfetcher/ob_log_start_lsn_locator.cpp @@ -273,7 +273,7 @@ void ObLogStartLSNLocator::run(const int64_t thread_index) } else { WorkerData &data = worker_data_[thread_index]; - while (! stop_flag_ && OB_SUCCESS == ret) { + while (! is_stoped() && OB_SUCCESS == ret) { if (OB_FAIL(do_retrieve_(thread_index, data))) { LOG_ERROR("retrieve request fail", KR(ret), K(thread_index)); } else if (! data.has_valid_req()) { @@ -291,7 +291,7 @@ void ObLogStartLSNLocator::run(const int64_t thread_index) } } - if (stop_flag_) { + if (is_stoped()) { ret = OB_IN_STOP_STATE; } } @@ -324,7 +324,7 @@ int ObLogStartLSNLocator::do_retrieve_(const int64_t thread_index, WorkerData &w int ret = OB_SUCCESS; int64_t batch_count = ATOMIC_LOAD(&g_batch_count); - for (int64_t cnt = 0; OB_SUCC(ret) && ! stop_flag_ && (cnt < batch_count); ++cnt) { + for (int64_t cnt = 0; OB_SUCC(ret) && ! is_stoped() && (cnt < batch_count); ++cnt) { StartLSNLocateReq *request = NULL; StartLSNLocateReq::SvrItem *item = NULL; SvrReq *svr_req = NULL; @@ -475,7 +475,7 @@ int ObLogStartLSNLocator::do_integrated_request_(WorkerData &data) LOG_ERROR("invalid rpc handle", KR(ret), K(rpc_)); } else { for (int64_t idx = 0, cnt = svr_req_list.count(); - ! stop_flag_ && OB_SUCCESS == ret && (idx < cnt); ++idx) { + ! is_stoped() && OB_SUCCESS == ret && (idx < cnt); ++idx) { if (OB_ISNULL(svr_req_list.at(idx))) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("svr request is NULL", KR(ret), K(idx), K(cnt), K(svr_req_list)); @@ -486,7 +486,7 @@ int ObLogStartLSNLocator::do_integrated_request_(WorkerData &data) // 1. The number of partitions on a single server may be greater than the maximum number of partitions for a single RPC and needs to be split into multiple requests // 2. Each partition request is removed from the request list as soon as it completes, so each request is split into multiple requests, each starting with the first element // 3. Partition request completion condition: regardless of success, as long as no breakpoint message is returned, the request is considered completed - while (! stop_flag_ && OB_SUCCESS == ret && svr_req.locate_req_list_.count() > 0) { + while (! is_stoped() && OB_SUCCESS == ret && svr_req.locate_req_list_.count() > 0) { // maximum request count int64_t item_cnt_limit = RpcReq::ITEM_CNT_LMT; int64_t req_cnt = std::min(svr_req.locate_req_list_.count(), item_cnt_limit); @@ -495,7 +495,7 @@ int ObLogStartLSNLocator::do_integrated_request_(WorkerData &data) // Note: A separate loop must be used here to ensure that the partition in the retry request is the same "breakpoint partition", // if the requested partition is not the same "breakpoint partition" but a new partition is added, the server will have // to scan the file from the "head" again and the breakpoint information will be invalid. - while (! stop_flag_ && OB_SUCCESS == ret && req_cnt > 0) { + while (! is_stoped() && OB_SUCCESS == ret && req_cnt > 0) { // Set different trace ids for different requests ObLogTraceIdGuard trace_guard; @@ -523,7 +523,7 @@ int ObLogStartLSNLocator::do_integrated_request_(WorkerData &data) } } - if (stop_flag_) { + if (is_stoped()) { ret = OB_IN_STOP_STATE; } @@ -536,7 +536,7 @@ int ObLogStartLSNLocator::do_direct_request_(WorkerData &data) DirectReqList &archive_req_lst = data.archive_req_list_; const int64_t lst_cnt = archive_req_lst.count(); for (int64_t idx = lst_cnt - 1; - OB_SUCC(ret) && !stop_flag_ && idx >= 0; --idx) { + OB_SUCC(ret) && !is_stoped() && idx >= 0; --idx) { LSN start_lsn; StartLSNLocateReq *req = archive_req_lst.at(idx); if (OB_ISNULL(req)) { @@ -582,7 +582,7 @@ int ObLogStartLSNLocator::build_request_params_(RpcReq &req, int64_t total_cnt = svr_req.locate_req_list_.count(); req.reset(); - for (int64_t index = 0; OB_SUCC(ret) && ! stop_flag_ && index < req_cnt && index < total_cnt; ++index) { + for (int64_t index = 0; OB_SUCC(ret) && ! is_stoped() && index < req_cnt && index < total_cnt; ++index) { StartLSNLocateReq *request = svr_req.locate_req_list_.at(index); StartLSNLocateReq::SvrItem *svr_item = NULL; @@ -653,7 +653,7 @@ int ObLogStartLSNLocator::do_rpc_and_dispatch_( if (OB_SUCCESS == ret) { // Scanning of arrays in reverse order to support deletion of completed ls requests - for (int64_t idx = request_cnt - 1; OB_SUCC(ret) && ! stop_flag_ && idx >= 0; idx--) { + for (int64_t idx = request_cnt - 1; OB_SUCC(ret) && ! is_stoped() && idx >= 0; idx--) { int ls_err = OB_SUCCESS; palf::LSN start_lsn; int64_t start_log_tstamp = OB_INVALID_TIMESTAMP; diff --git a/src/logservice/logfetcher/ob_ls_worker.cpp b/src/logservice/logfetcher/ob_ls_worker.cpp index 10b686aaeb..e54a36cda0 100644 --- a/src/logservice/logfetcher/ob_ls_worker.cpp +++ b/src/logservice/logfetcher/ob_ls_worker.cpp @@ -336,8 +336,9 @@ void ObLSWorker::handle(void *data, volatile bool &stop_flag) LOG_ERROR("invalid task", KR(ret), K(task), K(thread_index)); } // If the stream task is currently suspended, the task is put to sleep - // DDL tasks are exempt from suspend and require always processing - else if (OB_UNLIKELY(is_paused) && ! task->is_sys_log_stream()) { + // 1. DDL tasks are exempt from suspend and require always processing + // 2. ready rpc(response already return) should always processing + else if (OB_UNLIKELY(is_paused) && ! (task->is_sys_log_stream() || task->is_rpc_ready())) { LOG_TRACE("[STAT] [STREAM_WORKER] [HIBERNATE_STREAM_TASK_ON_PAUSE]", K(task)); if (OB_FAIL(hibernate_stream_task(*task, "PausedFetcher"))) { diff --git a/src/logservice/logfetcher/ob_map_queue_thread.h b/src/logservice/logfetcher/ob_map_queue_thread.h index 0f2fbbef6c..14b0b27b0c 100644 --- a/src/logservice/logfetcher/ob_map_queue_thread.h +++ b/src/logservice/logfetcher/ob_map_queue_thread.h @@ -83,7 +83,7 @@ public: void destroy(); int start(); void stop(); - void mark_stop_flag() { ATOMIC_STORE(&stop_flag_, false); } + void mark_stop_flag() { ATOMIC_STORE(&stop_flag_, true); } bool is_stoped() const { return ATOMIC_LOAD(&stop_flag_); } int64_t get_thread_num() const { return thread_num_; } @@ -156,7 +156,7 @@ int ObMapQueueThread::init(const int64_t thread_num, const char } thread_num_ = thread_num; - stop_flag_ = true; + stop_flag_ = false; inited_ = true; } @@ -185,7 +185,7 @@ int ObMapQueueThread::start() if (OB_UNLIKELY(! inited_)) { LIB_LOG(ERROR, "not inited"); ret = OB_NOT_INIT; - } else if (stop_flag_) { + } else { stop_flag_ = false; for (int64_t index = 0; OB_SUCCESS == ret && index < thread_num_; index++) { @@ -204,8 +204,8 @@ int ObMapQueueThread::start() template void ObMapQueueThread::stop() { + mark_stop_flag(); if (inited_) { - stop_flag_ = true; for (int64_t index = 0; index < thread_num_; index++) { ThreadConf &tc = tc_[index]; @@ -245,7 +245,7 @@ void ObMapQueueThread::run(const int64_t thread_index) LIB_LOG(ERROR, "invalid thread index", K(thread_index), K(thread_num_)); ret = OB_ERR_UNEXPECTED; } else { - while (! stop_flag_ && OB_SUCCESS == ret) { + while (! is_stoped() && OB_SUCCESS == ret) { void *task = NULL; if (OB_FAIL(next_task_(thread_index, task))) { @@ -305,7 +305,7 @@ int ObMapQueueThread::next_task_(const int64_t index, void *&tas ret = OB_ERR_UNEXPECTED; } else { ThreadConf &tc = tc_[index]; - while (! stop_flag_ && OB_SUCCESS == ret) { + while (! is_stoped() && OB_SUCCESS == ret) { task = NULL; if (OB_FAIL(tc.queue_.pop(task))) { @@ -324,7 +324,7 @@ int ObMapQueueThread::next_task_(const int64_t index, void *&tas } } - if (stop_flag_) { + if (is_stoped()) { ret = OB_IN_STOP_STATE; } } diff --git a/src/share/config/ob_common_config.cpp b/src/share/config/ob_common_config.cpp index a4f78dc291..bbfa1b04a8 100644 --- a/src/share/config/ob_common_config.cpp +++ b/src/share/config/ob_common_config.cpp @@ -147,7 +147,9 @@ int ObBaseConfig::load_from_buffer(const char *config_str, const int64_t config_ } else { (*pp_item)->set_value(value); (*pp_item)->set_version(version); - _LOG_INFO("load config succ, %s=%s", name, value); + if (need_print_config(name)) { + _LOG_INFO("load config succ, %s=%s", name, value); + } } if (OB_SUCCESS == ret) { diff --git a/src/share/config/ob_common_config.h b/src/share/config/ob_common_config.h index 478f05ba17..0bd2d46fc8 100644 --- a/src/share/config/ob_common_config.h +++ b/src/share/config/ob_common_config.h @@ -76,6 +76,7 @@ public: const int64_t version = 0, const bool check_name = false); int load_from_file(const char *config_file, const int64_t version = 0, const bool check_name = false); int dump2file(const char *config_file) const; + virtual bool need_print_config(const std::string& config_key) const { return true; } private: bool inited_; static const int64_t OB_MAX_CONFIG_LENGTH = 5 * 1024 * 1024; // 5M diff --git a/unittest/libobcdc/test_log_task_pool.cpp b/unittest/libobcdc/test_log_task_pool.cpp index 8fd44a9eab..89cad17a79 100644 --- a/unittest/libobcdc/test_log_task_pool.cpp +++ b/unittest/libobcdc/test_log_task_pool.cpp @@ -71,7 +71,7 @@ TEST(ObLogTransTaskPool, Init) ObLogTransTaskPool pool; - int ret = pool.init(&fifo, part_trans_task_prealloc_count, page_size, true, prealloc_page_count); + int ret = pool.init(&fifo, part_trans_task_prealloc_count, true, prealloc_page_count); EXPECT_EQ(OB_SUCCESS, ret); } @@ -85,7 +85,7 @@ TEST(ObLogTransTaskPool, Function1) ObLogTransTaskPool pool; - int ret = pool.init(&fifo, 1024 * 8, 1024, true, 1024); + int ret = pool.init(&fifo, 1024 * 8, true, 1024); EXPECT_EQ(OB_SUCCESS, ret); MockTransTask **tasks = new MockTransTask*[task_cnt]; @@ -118,7 +118,7 @@ TEST(ObLogTransTaskPool, Function2) ObLogTransTaskPool pool; - int ret = pool.init(&fifo, 1024 * 8, 1024, true, 1024); + int ret = pool.init(&fifo, 1024 * 8, true, 1024); EXPECT_EQ(OB_SUCCESS, ret); MockTransTask **tasks = new MockTransTask*[task_cnt]; diff --git a/unittest/libobcdc/test_ob_cdc_part_trans_resolver.cpp b/unittest/libobcdc/test_ob_cdc_part_trans_resolver.cpp index 4b621825be..8b6c69c21a 100644 --- a/unittest/libobcdc/test_ob_cdc_part_trans_resolver.cpp +++ b/unittest/libobcdc/test_ob_cdc_part_trans_resolver.cpp @@ -63,7 +63,7 @@ using namespace logfetcher; ObLogPartTransResolverFactory resolver_factory; \ ObLogTransTaskPool task_pool; \ EXPECT_EQ(OB_SUCCESS, fifo_allocator.init(16 * _G_, 16 * _M_, OB_MALLOC_NORMAL_BLOCK_SIZE)); \ - EXPECT_EQ(OB_SUCCESS, task_pool.init(&fifo_allocator, PREALLOC_POOL_SIZE, TRANS_TASK_PAGE_SIZE, true, PREALLOC_PAGE_COUNT)); \ + EXPECT_EQ(OB_SUCCESS, task_pool.init(&fifo_allocator, PREALLOC_POOL_SIZE, true, PREALLOC_PAGE_COUNT)); \ ObLogEntryTaskPool log_entry_task_pool; \ EXPECT_EQ(OB_SUCCESS, log_entry_task_pool.init(10/* fixed_log_entry_task_count */)); \ MockFetcherDispatcher fetcher_dispatcher; \ @@ -132,7 +132,6 @@ namespace unittest // Task Pool static const int64_t PREALLOC_POOL_SIZE = 10 * 1024; -static const int64_t TRANS_TASK_PAGE_SIZE = 1024; static const int64_t TRANS_TASK_BLOCK_SIZE = 4 * 1024 *1024; static const int64_t PREALLOC_PAGE_COUNT = 1024; @@ -374,8 +373,9 @@ TEST(ObCDCPartTransResolver, test_sp_tx_seq2_miss) EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_miss_tx_log(log_entry, lsn, tsi, new_miss_log)); EXPECT_EQ(0, new_miss_log.get_total_misslog_cnt()); - missing_info.set_need_reconsume_commit_log_entry(); - EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry2, lsn2, missing_info, tsi, stop_flag)); + IObCDCPartTransResolver::MissingLogInfo reconsume_miss_info; + reconsume_miss_info.set_reconsuming(); + EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry2, lsn2, reconsume_miss_info, tsi, stop_flag)); DESTROY_OBLOG_INSTANCE(); } @@ -425,7 +425,7 @@ TEST(ObCDCPartTransResolver, test_sp_tx_seq3_miss) EXPECT_EQ(0, new_miss_log.get_total_misslog_cnt()); EXPECT_TRUE(missing_info.need_reconsume_commit_log_entry()); IObCDCPartTransResolver::MissingLogInfo reconsume_miss_info; - reconsume_miss_info.set_need_reconsume_commit_log_entry(); + reconsume_miss_info.set_reconsuming(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry2, lsn2, reconsume_miss_info, tsi, stop_flag)); EXPECT_EQ(0, reconsume_miss_info.get_total_misslog_cnt()); @@ -483,7 +483,7 @@ TEST(ObCDCPartTransResolver, test_sp_tx_seq4_miss_1) new_miss_log.set_resolving_miss_log(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_miss_tx_log(log_entry, lsn, tsi, new_miss_log)); IObCDCPartTransResolver::MissingLogInfo reconsume_miss_info; - reconsume_miss_info.set_need_reconsume_commit_log_entry(); + reconsume_miss_info.set_reconsuming(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry2, lsn2, reconsume_miss_info, tsi, stop_flag)); missing_info.reset(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry3, lsn3, missing_info, tsi, stop_flag)); @@ -524,7 +524,7 @@ TEST(ObCDCPartTransResolver, test_sp_tx_seq4_miss_2) EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_miss_tx_log(log_entry, lsn, tsi, new_miss_log_2)); IObCDCPartTransResolver::MissingLogInfo reconsume_miss_info; - reconsume_miss_info.set_need_reconsume_commit_log_entry(); + reconsume_miss_info.set_reconsuming(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry3, lsn3, reconsume_miss_info, tsi, stop_flag)); @@ -582,9 +582,6 @@ TEST(ObCDCPartTransResolver, test_sp_tx_seq5_miss) IObCDCPartTransResolver::MissingLogInfo new_miss_log; new_miss_log.set_resolving_miss_log(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_miss_tx_log(log_entry, lsn, tsi, new_miss_log)); - IObCDCPartTransResolver::MissingLogInfo reconsume_miss_info; - reconsume_miss_info.set_need_reconsume_commit_log_entry(); - EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry2, lsn2, reconsume_miss_info, tsi, stop_flag)); missing_info.reset(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry3, lsn3, missing_info, tsi, stop_flag)); DESTROY_OBLOG_INSTANCE(); @@ -651,7 +648,7 @@ TEST(ObCDCPartTransResolver, test_sp_tx_seq6_miss) EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_miss_tx_log(log_entry2, lsn2, tsi, new_miss_log)); IObCDCPartTransResolver::MissingLogInfo reconsume_miss_info; - reconsume_miss_info.set_need_reconsume_commit_log_entry(); + reconsume_miss_info.set_reconsuming(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry3, lsn3, reconsume_miss_info, tsi, stop_flag)); @@ -746,7 +743,7 @@ TEST(ObCDCPartTransResolver, test_sp_tx_dist_miss2) EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_miss_tx_log(log_entry, lsn, tsi, new_miss_log)); EXPECT_EQ(0, new_miss_log.get_total_misslog_cnt()); missing_info.reset(); - missing_info.set_need_reconsume_commit_log_entry(); + missing_info.set_reconsuming(); EXPECT_EQ(OB_SUCCESS, ls_fetch_ctx->read_log(log_entry3, lsn3, missing_info, tsi, stop_flag)); DESTROY_OBLOG_INSTANCE();