From 51ee0c55b2b45289b79cd22dad662a852c5795af Mon Sep 17 00:00:00 2001 From: obdev Date: Thu, 15 Jul 2021 18:27:13 +0800 Subject: [PATCH] cherry-pick from 3_1_x_release to 3.1_opensource_release --- deps/oblib/src/lib/ob_errno.h | 3 +- deps/oblib/src/lib/restore/CMakeLists.txt | 26 ++-- deps/oblib/src/lib/restore/ob_storage.cpp | 67 ++++++++++ deps/oblib/src/lib/restore/ob_storage.h | 10 +- .../oblib/src/lib/restore/ob_storage_file.cpp | 10 +- .../src/lib/restore/ob_storage_oss_base.cpp | 1 + .../src/lib/statistic_event/ob_stat_event.h | 50 ++++++++ src/archive/ob_archive_entry_iterator.cpp | 14 ++- src/archive/ob_archive_log_file_store.cpp | 23 ++-- src/archive/ob_archive_restore_engine.cpp | 4 +- src/archive/ob_archive_restore_engine.h | 2 +- src/archive/ob_archive_sender.cpp | 116 +++++++++--------- src/archive/ob_archive_sender.h | 4 +- src/archive/ob_archive_util.cpp | 6 + src/archive/ob_archive_util.h | 1 + src/share/ob_errno.cpp | 10 +- src/share/ob_errno.def | 1 + src/share/ob_errno.h | 4 +- src/storage/ob_partition_group.cpp | 2 +- 19 files changed, 246 insertions(+), 108 deletions(-) diff --git a/deps/oblib/src/lib/ob_errno.h b/deps/oblib/src/lib/ob_errno.h index 36d98a3342..efebca8110 100644 --- a/deps/oblib/src/lib/ob_errno.h +++ b/deps/oblib/src/lib/ob_errno.h @@ -228,12 +228,11 @@ constexpr int OB_INIT_MD5_ERROR = -9004; constexpr int OB_OUT_OF_ELEMENT = -9005; constexpr int OB_UPDATE_MD5_ERROR = -9006; constexpr int OB_FILE_LENGTH_INVALID = -9007; -constexpr int OB_COS_ERROR = -9008; constexpr int OB_BACKUP_FILE_NOT_EXIST = -9011; constexpr int OB_INVALID_BACKUP_DEST = -9026; +constexpr int OB_IO_LIMIT = -9061; constexpr int OB_MAX_RAISE_APPLICATION_ERROR = -20000; constexpr int OB_MIN_RAISE_APPLICATION_ERROR = -20999; - } // namespace common using namespace common; // maybe someone can fix } // namespace oceanbase diff --git a/deps/oblib/src/lib/restore/CMakeLists.txt b/deps/oblib/src/lib/restore/CMakeLists.txt index 766c2b7c8e..cb57ff20a5 100644 --- a/deps/oblib/src/lib/restore/CMakeLists.txt +++ b/deps/oblib/src/lib/restore/CMakeLists.txt @@ -1,5 +1,5 @@ -ob_set_subtarget(oblib_lib_restore common - ob_storage.cpp +ob_set_subtarget(oblib_lib_restore common + ob_storage.cpp ob_storage_file.cpp # ob_storage_oss_base.cpp ob_storage_path.cpp) @@ -9,16 +9,16 @@ ob_lib_add_target(oblib_lib_restore) #add_library(oss INTERFACE) #target_include_directories(oss -# INTERFACE -# ${DEP_DIR}/include/apr-1 -# ${DEP_DIR}/include/oss_c_sdk -# ${DEP_DIR}/include/mxml) - +#INTERFACE +#${DEP_DIR } / include / apr - 1 +#${DEP_DIR } / include / oss_c_sdk +#${DEP_DIR } / include / mxml) + #target_link_libraries(oss -# INTERFACE -# ${DEP_DIR}/lib/liboss_c_sdk_static.a -# ${DEP_DIR}/lib/libaprutil-1.a -# ${DEP_DIR}/lib/libapr-1.a -# ${DEP_DIR}/lib/libmxml.a) - +#INTERFACE +#${DEP_DIR } / lib / liboss_c_sdk_static.a +#${DEP_DIR } / lib / libaprutil - 1.a +#${DEP_DIR } / lib / libapr - 1.a +#${DEP_DIR } / lib / libmxml.a) + #target_link_libraries(restore PUBLIC oss oblib_base) diff --git a/deps/oblib/src/lib/restore/ob_storage.cpp b/deps/oblib/src/lib/restore/ob_storage.cpp index 321a837706..2500585725 100644 --- a/deps/oblib/src/lib/restore/ob_storage.cpp +++ b/deps/oblib/src/lib/restore/ob_storage.cpp @@ -13,6 +13,7 @@ #include "ob_storage.h" #include "lib/restore/ob_i_storage.h" #include "lib/utility/ob_tracepoint.h" +#include "lib/stat/ob_diagnose_info.h" namespace oceanbase { namespace common { @@ -202,6 +203,12 @@ int ObStorageUtil::del_file(const common::ObString& uri, const common::ObString& } #endif print_access_storage_log("del_file", uri, start_ts); + + if (OB_FAIL(ret)) { + EVENT_INC(ObStatEventIds::BACKUP_IO_DEL_FAIL_COUNT); + } + EVENT_INC(ObStatEventIds::BACKUP_DELETE_COUNT); + EVENT_ADD(ObStatEventIds::BACKUP_DELETE_DELAY, ObTimeUtility::current_time() - start_ts); return ret; } @@ -409,6 +416,13 @@ int ObStorageUtil::list_files(const common::ObString& uri, const common::ObStrin } } print_access_storage_log("list_files", uri, start_ts, 0); + + if (OB_FAIL(ret)) { + EVENT_INC(ObStatEventIds::BACKUP_IO_LS_FAIL_COUNT); + } + + EVENT_INC(ObStatEventIds::BACKUP_IO_LS_COUNT); + return ret; } @@ -440,6 +454,8 @@ int ObStorageUtil::write_single_file( need_retry = true; ret = OB_SUCCESS; } + } else { + EVENT_ADD(ObStatEventIds::BACKUP_IO_WRITE_BYTES, size); } } } @@ -449,6 +465,14 @@ int ObStorageUtil::write_single_file( } #endif print_access_storage_log("write_single_file", uri, start_ts, size); + + if (OB_FAIL(ret)) { + EVENT_INC(ObStatEventIds::BACKUP_IO_WRITE_FAIL_COUNT); + } + + EVENT_INC(ObStatEventIds::BACKUP_IO_WRITE_COUNT); + EVENT_ADD(ObStatEventIds::BACKUP_IO_WRITE_DELAY, ObTimeUtility::current_time() - start_ts); + return ret; } @@ -685,6 +709,8 @@ int ObStorageReader::pread(char* buf, const int64_t buf_size, int64_t offset, in #ifdef ERRSIM ret = E(EventTable::EN_BACKUP_IO_READER_PREAD) OB_SUCCESS; #endif + + const int64_t start_ts = ObTimeUtility::current_time(); if (OB_FAIL(ret)) { } else if (OB_ISNULL(reader_)) { ret = OB_NOT_INIT; @@ -693,8 +719,15 @@ int ObStorageReader::pread(char* buf, const int64_t buf_size, int64_t offset, in ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid args", K(ret), KP(buf), K(offset), K(file_length_)); } else if (OB_FAIL(reader_->pread(buf, buf_size, offset, read_size))) { + EVENT_INC(ObStatEventIds::BACKUP_IO_READ_FAIL_COUNT); STORAGE_LOG(WARN, "failed to read file", K(ret)); + } else { + EVENT_ADD(ObStatEventIds::BACKUP_IO_READ_BYTES, read_size); } + + EVENT_INC(ObStatEventIds::BACKUP_IO_READ_COUNT); + EVENT_ADD(ObStatEventIds::BACKUP_IO_READ_DELAY, ObTimeUtility::current_time() - start_ts); + return ret; } @@ -783,13 +816,22 @@ int ObStorageWriter::write(const char* buf, const int64_t size) #ifdef ERRSIM ret = E(EventTable::EN_BACKUP_IO_WRITE_WRITE) OB_SUCCESS; #endif + + const int64_t start_ts = ObTimeUtility::current_time(); if (OB_FAIL(ret)) { } else if (OB_ISNULL(writer_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "not opened", K(ret)); } else if (OB_FAIL(writer_->write(buf, size))) { + EVENT_INC(ObStatEventIds::BACKUP_IO_WRITE_FAIL_COUNT); STORAGE_LOG(WARN, "failed to write", K(ret)); + } else { + EVENT_ADD(ObStatEventIds::BACKUP_IO_WRITE_BYTES, size); } + + EVENT_INC(ObStatEventIds::BACKUP_IO_WRITE_COUNT); + EVENT_ADD(ObStatEventIds::BACKUP_IO_WRITE_DELAY, ObTimeUtility::current_time() - start_ts); + return ret; } @@ -936,13 +978,22 @@ int ObStorageAppender::write(const char* buf, const int64_t size) #ifdef ERRSIM ret = E(EventTable::EN_BACKUP_IO_APPENDER_WRITE) OB_SUCCESS; #endif + + const int64_t start_ts = ObTimeUtility::current_time(); if (OB_FAIL(ret)) { } else if (OB_ISNULL(appender_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "not opened", K(ret)); } else if (OB_FAIL(appender_->write(buf, size))) { + EVENT_INC(ObStatEventIds::BACKUP_IO_WRITE_FAIL_COUNT); STORAGE_LOG(WARN, "failed to write", K(ret)); + } else { + EVENT_ADD(ObStatEventIds::BACKUP_IO_WRITE_BYTES, size); } + + EVENT_INC(ObStatEventIds::BACKUP_IO_WRITE_COUNT); + EVENT_ADD(ObStatEventIds::BACKUP_IO_WRITE_DELAY, ObTimeUtility::current_time() - start_ts); + return ret; } @@ -991,11 +1042,19 @@ int ObStorageMetaWrapper::get(const common::ObString& uri, const common::ObStrin int ret = OB_SUCCESS; ObIStorageMetaWrapper* meta = NULL; + const int64_t start_ts = ObTimeUtility::current_time(); if (OB_FAIL(get_meta(uri, meta))) { STORAGE_LOG(WARN, "failed to get meta", K(ret), K(uri)); } else if (OB_FAIL(meta->get(uri, storage_info, buf, buf_size, read_size))) { + EVENT_INC(ObStatEventIds::BACKUP_IO_READ_FAIL_COUNT); STORAGE_LOG(WARN, "failed to meta get", K(ret), K(uri)); + } else { + EVENT_ADD(ObStatEventIds::BACKUP_IO_READ_BYTES, read_size); } + + EVENT_INC(ObStatEventIds::BACKUP_IO_READ_COUNT); + EVENT_ADD(ObStatEventIds::BACKUP_IO_READ_DELAY, ObTimeUtility::current_time() - start_ts); + return ret; } @@ -1005,11 +1064,19 @@ int ObStorageMetaWrapper::set( int ret = OB_SUCCESS; ObIStorageMetaWrapper* meta = NULL; + const int64_t start_ts = ObTimeUtility::current_time(); if (OB_FAIL(get_meta(uri, meta))) { STORAGE_LOG(WARN, "failed to get meta", K(ret), K(uri)); } else if (OB_FAIL(meta->set(uri, storage_info, buf, size))) { + EVENT_INC(ObStatEventIds::BACKUP_IO_WRITE_FAIL_COUNT); STORAGE_LOG(WARN, "failed to meta set", K(ret), K(uri)); + } else { + EVENT_ADD(ObStatEventIds::BACKUP_IO_WRITE_BYTES, size); } + + EVENT_INC(ObStatEventIds::BACKUP_IO_WRITE_COUNT); + EVENT_ADD(ObStatEventIds::BACKUP_IO_WRITE_DELAY, ObTimeUtility::current_time() - start_ts); + return ret; } diff --git a/deps/oblib/src/lib/restore/ob_storage.h b/deps/oblib/src/lib/restore/ob_storage.h index 50b24a4a3b..10b65fdc46 100644 --- a/deps/oblib/src/lib/restore/ob_storage.h +++ b/deps/oblib/src/lib/restore/ob_storage.h @@ -30,23 +30,21 @@ int get_storage_type_from_name(const char* type_str, ObStorageType& type); const char* get_storage_type_str(const ObStorageType& type); enum ObAppendStrategy { - // Each write is a PUT operation that will overlay the old object - OB_APPEND_USE_SIMPLE_PUT = 0, // Each write will be done by the following operations: // 1. read the whole object // 2. write with previously read data as a newer object - OB_APPEND_USE_OVERRITE = 1, + OB_APPEND_USE_OVERRITE = 0, // Append data to the tail of the object with specific offset. The write // will be done only if actual tail is equal to the input offset. Otherwise, // return failed. - OB_APPEND_USE_APPEND = 2, + OB_APPEND_USE_APPEND = 1, // In this case, the object is a logical one which is actually composed of several // pythysical subobject. A number will be given for each write to format the name of // the subobject combined with the logical object name. - OB_APPEND_USE_SLICE_PUT = 3, + OB_APPEND_USE_SLICE_PUT = 2, // In this case, we will use multi-part upload provided by object storage, eg S3, to write // for the object. Note that the object is invisible before all parts are written. - OB_APPEND_USE_MULTI_PART_UPLOAD = 4, + OB_APPEND_USE_MULTI_PART_UPLOAD = 3, OB_APPEND_STRATEGY_TYPE }; diff --git a/deps/oblib/src/lib/restore/ob_storage_file.cpp b/deps/oblib/src/lib/restore/ob_storage_file.cpp index 0bf0f47d5c..35c127c735 100644 --- a/deps/oblib/src/lib/restore/ob_storage_file.cpp +++ b/deps/oblib/src/lib/restore/ob_storage_file.cpp @@ -350,7 +350,7 @@ int ObStorageFileUtil::list_files(const common::ObString& uri, const common::ObS STORAGE_LOG(WARN, "failed to fill path", K(ret), K(uri)); } else if (OB_ISNULL(open_dir = ::opendir(dir_path))) { if (ENOENT != errno) { - ret = OB_FILE_NOT_OPENED; + ret = OB_IO_ERROR; OB_LOG(WARN, "fail to open dir", K(ret), K(dir_path), K(strerror_r(errno, errno_buf, sizeof(errno_buf)))); } } @@ -418,7 +418,7 @@ int ObStorageFileUtil::del_dir(const common::ObString& uri, const common::ObStri STORAGE_LOG(WARN, "path is not to dir", K(ret), K(dir_path)); } else if (OB_ISNULL(open_dir = ::opendir(dir_path))) { if (ENOENT != errno) { - ret = OB_FILE_NOT_OPENED; + ret = OB_IO_ERROR; OB_LOG(WARN, "fail to open dir", K(ret), K(dir_path), K(strerror_r(errno, errno_buf, sizeof(errno_buf)))); } } else { @@ -468,7 +468,7 @@ int ObStorageFileUtil::get_pkeys_from_dir( STORAGE_LOG(WARN, "failed to fill path", K(ret), K(uri)); } else if (OB_ISNULL(open_dir = ::opendir(dir_path))) { if (ENOENT != errno) { - ret = OB_FILE_NOT_OPENED; + ret = OB_IO_ERROR; OB_LOG(WARN, "fail to open dir", K(ret), K(dir_path), K(strerror_r(errno, errno_buf, sizeof(errno_buf)))); } } @@ -476,7 +476,7 @@ int ObStorageFileUtil::get_pkeys_from_dir( ObPartitionKey pkey; // tmp file name format tableid_partitionid.tmp.timestamp const char* tmp_file_mark = ".tmp."; - while (OB_SUCC(ret)) { + while (OB_SUCC(ret) && NULL != open_dir) { pkey.reset(); if (0 != ::readdir_r(open_dir, &entry, &result)) { ret = OB_IO_ERROR; @@ -658,7 +658,7 @@ int ObStorageFileUtil::delete_tmp_files(const common::ObString& uri, const commo STORAGE_LOG(WARN, "path is not to dir", K(ret), K(dir_path)); } else if (OB_ISNULL(open_dir = ::opendir(dir_path))) { if (ENOENT != errno) { - ret = OB_FILE_NOT_OPENED; + ret = OB_IO_ERROR; OB_LOG(WARN, "fail to open dir", K(ret), K(dir_path), "errno", strerror_r(errno, errno_buf, sizeof(errno_buf))); } } else { diff --git a/deps/oblib/src/lib/restore/ob_storage_oss_base.cpp b/deps/oblib/src/lib/restore/ob_storage_oss_base.cpp index 891635fce1..73b86ff811 100644 --- a/deps/oblib/src/lib/restore/ob_storage_oss_base.cpp +++ b/deps/oblib/src/lib/restore/ob_storage_oss_base.cpp @@ -428,6 +428,7 @@ int ObStorageOssBase::init_oss_options(aos_pool_t*& aos_pool, oss_request_option // time out To prevent overtime, reduce the minimum tolerable rate and increase the maximum tolerable time Control // the minimum rate that can be tolerated, the default is 1024, which is 1K oss_option->ctl->options->speed_limit = // 256; The maximum time that the control can tolerate, the default is 15 seconds + oss_option->ctl->options->speed_limit = 16000; oss_option->ctl->options->speed_time = 60; } return ret; diff --git a/deps/oblib/src/lib/statistic_event/ob_stat_event.h b/deps/oblib/src/lib/statistic_event/ob_stat_event.h index 6e28942fba..513ad8fe66 100644 --- a/deps/oblib/src/lib/statistic_event/ob_stat_event.h +++ b/deps/oblib/src/lib/statistic_event/ob_stat_event.h @@ -533,6 +533,56 @@ STAT_EVENT_ADD_DEF(BANDWIDTH_OUT_SLEEP_US, "bandwidth out sleep us", ObStatClass STAT_EVENT_ADD_DEF(MEMSTORE_WRITE_LOCK_WAIT_TIMEOUT_COUNT, "memstore write lock wait timeout count", ObStatClassIds::STORAGE, "memstore write wait timeout count", 60083, true, true) +// backup & restore +STAT_EVENT_ADD_DEF( + BACKUP_IO_READ_COUNT, "backup io read count", ObStatClassIds::STORAGE, "backup io read count", 69000, true, true) +STAT_EVENT_ADD_DEF( + BACKUP_IO_READ_BYTES, "backup io read bytes", ObStatClassIds::STORAGE, "backup io read bytes", 69001, true, true) +STAT_EVENT_ADD_DEF( + BACKUP_IO_WRITE_COUNT, "backup io write count", ObStatClassIds::STORAGE, "backup io write count", 69002, true, true) +STAT_EVENT_ADD_DEF( + BACKUP_IO_WRITE_BYTES, "backup io write bytes", ObStatClassIds::STORAGE, "backup io write bytes", 69003, true, true) +STAT_EVENT_ADD_DEF( + COS_IO_READ_COUNT, "cos io read count", ObStatClassIds::STORAGE, "cos io read count", 69004, true, true) +STAT_EVENT_ADD_DEF( + COS_IO_READ_BYTES, "cos io read bytes", ObStatClassIds::STORAGE, "cos io read bytes", 69005, true, true) +STAT_EVENT_ADD_DEF( + COS_IO_WRITE_COUNT, "cos io write count", ObStatClassIds::STORAGE, "cos io write count", 69006, true, true) +STAT_EVENT_ADD_DEF( + COS_IO_WRITE_BYTES, "cos io write bytes", ObStatClassIds::STORAGE, "cos io write bytes", 69007, true, true) +STAT_EVENT_ADD_DEF( + COS_IO_LS_COUNT, "cos io list count", ObStatClassIds::STORAGE, "cos io list count", 69008, true, true) +STAT_EVENT_ADD_DEF(COS_IO_LS_LIMIT_COUNT, "cos list io limit count", ObStatClassIds::STORAGE, "cos list io limit count", + 69009, true, true) +STAT_EVENT_ADD_DEF( + BACKUP_DELETE_COUNT, "backup delete count", ObStatClassIds::STORAGE, "backup delete count", 69010, true, true) +STAT_EVENT_ADD_DEF(COS_DELETE_COUNT, "cos delete count", ObStatClassIds::STORAGE, "cos delete count", 69011, true, true) + +STAT_EVENT_ADD_DEF( + BACKUP_IO_READ_DELAY, "backup io read delay", ObStatClassIds::STORAGE, "backup io read delay", 60012, true, true) +STAT_EVENT_ADD_DEF( + BACKUP_IO_WRITE_DELAY, "backup io write delay", ObStatClassIds::STORAGE, "backup io write delay", 60013, true, true) +STAT_EVENT_ADD_DEF( + COS_IO_READ_DELAY, "cos io read delay", ObStatClassIds::STORAGE, "cos io read delay", 60014, true, true) +STAT_EVENT_ADD_DEF( + COS_IO_WRITE_DELAY, "cos io write delay", ObStatClassIds::STORAGE, "cos io write delay", 60015, true, true) +STAT_EVENT_ADD_DEF( + COS_IO_LS_DELAY, "cos io list delay", ObStatClassIds::STORAGE, "cos io list delay", 69016, true, true) +STAT_EVENT_ADD_DEF( + BACKUP_DELETE_DELAY, "backup delete delay", ObStatClassIds::STORAGE, "backup delete delay", 69017, true, true) +STAT_EVENT_ADD_DEF(COS_DELETE_DELAY, "cos delete delay", ObStatClassIds::STORAGE, "cos delete delay", 69018, true, true) + +STAT_EVENT_ADD_DEF( + BACKUP_IO_LS_COUNT, "backup io list count", ObStatClassIds::STORAGE, "backup io list count", 69019, true, true) +STAT_EVENT_ADD_DEF(BACKUP_IO_READ_FAIL_COUNT, "backup io read failed count", ObStatClassIds::STORAGE, + "backup io read failed count", 69020, true, true) +STAT_EVENT_ADD_DEF(BACKUP_IO_WRITE_FAIL_COUNT, "backup io write failed count", ObStatClassIds::STORAGE, + "backup io write failed count", 69021, true, true) +STAT_EVENT_ADD_DEF(BACKUP_IO_DEL_FAIL_COUNT, "backup io delete failed count", ObStatClassIds::STORAGE, + "backup io delete failed count", 69022, true, true) +STAT_EVENT_ADD_DEF(BACKUP_IO_LS_FAIL_COUNT, "backup io list failed count", ObStatClassIds::STORAGE, + "backup io list failed count", 69023, true, true) + // DEBUG STAT_EVENT_ADD_DEF( REFRESH_SCHEMA_COUNT, "refresh schema count", ObStatClassIds::DEBUG, "refresh schema count", 70000, true, true) diff --git a/src/archive/ob_archive_entry_iterator.cpp b/src/archive/ob_archive_entry_iterator.cpp index 9af2fa16b1..a21d57efc0 100644 --- a/src/archive/ob_archive_entry_iterator.cpp +++ b/src/archive/ob_archive_entry_iterator.cpp @@ -16,6 +16,7 @@ #include "ob_log_archive_struct.h" #include "clog/ob_log_entry.h" #include "observer/ob_server_struct.h" +#include "ob_archive_util.h" // is_io_error namespace oceanbase { using namespace common; @@ -202,10 +203,15 @@ int ObArchiveEntryIterator::prepare_buffer_() ObReadRes read_res; const int64_t begin_time = ObClockGenerator::getClock(); - if (OB_FAIL(file_store_->read_data_direct(param, rbuf_, read_res)) && OB_BACKUP_FILE_NOT_EXIST != ret) { - ARCHIVE_LOG(ERROR, "failed to read_data_direct", KR(ret), K(param), KPC(this)); - } else if (0 == read_res.data_len_ || OB_BACKUP_FILE_NOT_EXIST == ret) { - // To end + if (OB_FAIL(file_store_->read_data_direct(param, rbuf_, read_res))) { + if (OB_BACKUP_FILE_NOT_EXIST == ret) { + ret = OB_ITER_END; + } else if (is_io_error(ret) || OB_ALLOCATE_MEMORY_FAILED == ret || OB_IO_LIMIT == ret) { + // do nothing + } else { + ARCHIVE_LOG(ERROR, "failed to read_data_direct", KR(ret), K(param), KPC(this)); + } + } else if (0 == read_res.data_len_) { ret = OB_ITER_END; } else if (OB_ISNULL(read_res.buf_) || OB_UNLIKELY(0 > read_res.data_len_)) { ret = OB_ERR_UNEXPECTED; diff --git a/src/archive/ob_archive_log_file_store.cpp b/src/archive/ob_archive_log_file_store.cpp index c70e31e7db..b08a1ee167 100644 --- a/src/archive/ob_archive_log_file_store.cpp +++ b/src/archive/ob_archive_log_file_store.cpp @@ -128,7 +128,6 @@ int ObArchiveLogFileStore::locate_file_by_log_id(const ObPGKey& pg_key, const ui } else { ARCHIVE_LOG(WARN, "search_log_id_in_index_file_ fail", K(ret), K(pg_key), K(log_id)); } - } else if (max_valid_index_info.min_log_id_ == log_id) { } else { ARCHIVE_LOG(INFO, "locate_file_by_log_id succ", K(pg_key), K(log_id), K(file_id)); } @@ -140,24 +139,25 @@ int ObArchiveLogFileStore::locate_file_by_log_id(const ObPGKey& pg_key, const ui bool exist_file_unrecord = false; uint64_t max_file_id = 0; if (OB_SUCCESS != - (tmp_ret = get_max_data_file_unrecord_(pg_key, storage_info, max_file_id, exist_file_unrecord)) && - OB_ERR_UNEXPECTED != tmp_ret) { - ARCHIVE_LOG(WARN, "get_max_data_file_unrecord_ fail", K(tmp_ret), K(pg_key), K(storage_info)); - } else if (OB_ERR_UNEXPECTED == tmp_ret) { + (tmp_ret = get_max_data_file_unrecord_(pg_key, storage_info, max_file_id, exist_file_unrecord))) { + ARCHIVE_LOG(WARN, "get_max_data_file_unrecord_ fail", KR(tmp_ret), K(pg_key), K(storage_info)); // overwrite ret ret = tmp_ret; - ARCHIVE_LOG(ERROR, "get_max_data_file_unrecord_ fail", K(ret), K(pg_key)); } else if (exist_file_unrecord) { file_id = max_file_id; ret = OB_SUCCESS; - ARCHIVE_LOG(INFO, "no valid index info, return max data file id", K(ret), K(pg_key), K(log_id), K(file_id)); + ARCHIVE_LOG(INFO, "no valid index info, return max data file id", KR(ret), K(pg_key), K(log_id), K(file_id)); } } // 2. min log id in index info > the log id else if (max_valid_index_info.min_log_id_ > log_id) { ret = OB_ERR_UNEXPECTED; - ARCHIVE_LOG( - ERROR, "max_valid_index_info min_log_id_ bigger than log_id", K(pg_key), K(log_id), K(max_valid_index_info)); + ARCHIVE_LOG(ERROR, + "max_valid_index_info min_log_id_ bigger than log_id", + KR(ret), + K(pg_key), + K(log_id), + K(max_valid_index_info)); } // 3. max log id in index info < the log id else if (max_valid_index_info.max_log_id_ < log_id) { @@ -165,9 +165,8 @@ int ObArchiveLogFileStore::locate_file_by_log_id(const ObPGKey& pg_key, const ui file_id = max_valid_index_info.data_file_id_; if (max_valid_index_info.max_log_id_ + 1 < log_id) { int tmp_ret = OB_SUCCESS; - if (OB_SUCCESS != - (tmp_ret = check_file_exist_(pg_key, max_valid_index_info.data_file_id_ + 1, LOG_ARCHIVE_FILE_TYPE_DATA))) { - ARCHIVE_LOG(WARN, "check_file_exist_ fail", KR(ret), K(pg_key), K(max_valid_index_info)); + if (OB_SUCCESS != (tmp_ret = check_file_exist_(pg_key, file_id + 1, LOG_ARCHIVE_FILE_TYPE_DATA))) { + ARCHIVE_LOG(WARN, "check_file_exist_ fail", KR(tmp_ret), K(pg_key), K(max_valid_index_info)); } else { file_id = max_valid_index_info.data_file_id_ + 1; } diff --git a/src/archive/ob_archive_restore_engine.cpp b/src/archive/ob_archive_restore_engine.cpp index 3baa19f381..8e8319223f 100644 --- a/src/archive/ob_archive_restore_engine.cpp +++ b/src/archive/ob_archive_restore_engine.cpp @@ -459,7 +459,7 @@ void ObArchiveRestoreEngine::handle(void* task) if (is_io_fail_ret_code_(ret)) { fetch_task->inc_io_fail_cnt(); if (fetch_task->get_io_fail_cnt() > MAX_FETCH_LOG_IO_FAIL_CNT) { - encount_fatal_err = true; + //encount_fatal_err = true; LOG_ERROR("io fail too many times", "task", *fetch_task, KR(ret)); } } @@ -806,7 +806,7 @@ int ObArchiveRestoreEngine::process_normal_clog_(const clog::ObLogEntry& log_ent bool ObArchiveRestoreEngine::need_retry_ret_code_(const int ret) { - return (OB_EAGAIN == ret || OB_OSS_ERROR == ret || OB_IO_ERROR == ret); + return (OB_EAGAIN == ret || OB_OSS_ERROR == ret || OB_IO_ERROR == ret || OB_ALLOCATE_MEMORY_FAILED == ret || OB_IO_LIMIT == ret); } bool ObArchiveRestoreEngine::is_io_fail_ret_code_(const int ret) diff --git a/src/archive/ob_archive_restore_engine.h b/src/archive/ob_archive_restore_engine.h index e876df7759..05eb755853 100644 --- a/src/archive/ob_archive_restore_engine.h +++ b/src/archive/ob_archive_restore_engine.h @@ -356,7 +356,7 @@ private: private: typedef common::ObLinkHashMap TenantRestoreMetaMap; - const int64_t MAX_FETCH_LOG_IO_FAIL_CNT = 5; + const int64_t MAX_FETCH_LOG_IO_FAIL_CNT = 5 * 100; const int64_t TASK_NUM_LIMIT = 1000000; const int64_t ACCESS_LOG_FILE_STORE_TIMEOUT = 3 * 1000 * 1000L; const int64_t MINI_MODE_RESTORE_THREAD_NUM = 1; diff --git a/src/archive/ob_archive_sender.cpp b/src/archive/ob_archive_sender.cpp index 161f2a795b..01841f844e 100644 --- a/src/archive/ob_archive_sender.cpp +++ b/src/archive/ob_archive_sender.cpp @@ -314,8 +314,8 @@ int ObArchiveSender::handle_task_list(ObArchiveTaskStatus* status) } } - // if io error or alloc memory fail or io limit, sleep 100ms - if (is_io_error_(ret) || OB_ALLOCATE_MEMORY_FAILED == ret) { + // if memory or IO failed, sleep 100ms + if (is_io_error(ret) || OB_ALLOCATE_MEMORY_FAILED == ret || OB_IO_LIMIT == ret) { usleep(100 * 1000L); } @@ -902,28 +902,30 @@ int ObArchiveSender::do_archive_log_(ObPGArchiveTask& pg_archive_task, const int ret = OB_IO_ERROR; } - if (is_io_error_(ret)) { + if (is_io_error(ret)) { int tmp_ret = OB_SUCCESS; if (OB_SUCCESS != (tmp_ret = pg_archive_task.set_file_force_switch(epoch, incarnation_, round, LOG_ARCHIVE_FILE_TYPE_DATA))) { ARCHIVE_LOG(WARN, "set_file_force_switch fail", KR(tmp_ret), K(pg_archive_task)); ret = tmp_ret; - // rewrite ret } else { ARCHIVE_LOG(INFO, "fake archive data corrupted", KR(ret), K(pg_archive_task)); } } } } +} #endif - // 7. write data file - if (OB_SUCC(ret) && !has_inject_fault) { - ObString storage_info(archive_round_mgr_->storage_info_); - ObArchiveIO archive_io(storage_info); - if (OB_FAIL(archive_io.push_log(file_path, buf_size, buffer, need_switch_file, compatible, is_data_file, epoch))) { - ARCHIVE_LOG(WARN, "push_log fail", KR(ret), K(pg_archive_task)); +// 7. write data file +if (OB_SUCC(ret) && !has_inject_fault) { + ObString storage_info(archive_round_mgr_->storage_info_); + ObArchiveIO archive_io(storage_info); + if (OB_FAIL(archive_io.push_log(file_path, buf_size, buffer, need_switch_file, compatible, is_data_file, epoch))) { + ARCHIVE_LOG(WARN, "push_log fail", KR(ret), K(pg_archive_task)); + // Do not switch file if returned error is not IO error. + if (is_io_error(ret)) { int tmp_ret = OB_SUCCESS; if (OB_SUCCESS != (tmp_ret = pg_archive_task.set_file_force_switch(epoch, incarnation, round, LOG_ARCHIVE_FILE_TYPE_DATA))) { @@ -932,34 +934,35 @@ int ObArchiveSender::do_archive_log_(ObPGArchiveTask& pg_archive_task, const int } } } +} - if (NULL != buffer) { - ob_archive_free(buffer); +if (NULL != buffer) { + ob_archive_free(buffer); +} + +// 8. record min_log/offset +if (OB_SUCC(ret)) { + ObArchiveSendTask* task = array[0]; + uint64_t min_log_id = task->start_log_id_; + int64_t min_log_ts = task->start_log_ts_; + if (OB_ARCHIVE_TASK_TYPE_KICKOFF == task->task_type_) { + min_log_id = task->start_log_id_ - 1; + } else if (OB_ARCHIVE_TASK_TYPE_CHECKPOINT == task->task_type_) { + // need send only if non-checkpoint log is included + min_log_id = task->start_log_id_ + 1; } - - // 8. record min_log/offset - if (OB_SUCC(ret)) { - ObArchiveSendTask* task = array[0]; - uint64_t min_log_id = task->start_log_id_; - int64_t min_log_ts = task->start_log_ts_; - if (OB_ARCHIVE_TASK_TYPE_KICKOFF == task->task_type_) { - min_log_id = task->start_log_id_ - 1; - } else if (OB_ARCHIVE_TASK_TYPE_CHECKPOINT == task->task_type_) { - // need send only if non-checkpoint log is included - min_log_id = task->start_log_id_ + 1; - } - if (OB_FAIL(update_data_file_info_(epoch, incarnation, round, min_log_id, min_log_ts, pg_archive_task, buf_size))) { - ARCHIVE_LOG(WARN, "update_data_file_info_ fail", KR(ret), K(pg_archive_task)); - } + if (OB_FAIL(update_data_file_info_(epoch, incarnation, round, min_log_id, min_log_ts, pg_archive_task, buf_size))) { + ARCHIVE_LOG(WARN, "update_data_file_info_ fail", KR(ret), K(pg_archive_task)); } +} - // 9. statistic - if (OB_SUCC(ret)) { - const int64_t cost_ts = ObTimeUtility::current_time() - start_ts; - statistic(array, cost_ts); - } +// 9. statistic +if (OB_SUCC(ret)) { + const int64_t cost_ts = ObTimeUtility::current_time() - start_ts; + statistic(array, cost_ts); +} - return ret; +return ret; } int ObArchiveSender::record_index_info( @@ -1085,40 +1088,39 @@ int ObArchiveSender::record_index_info_(ObPGArchiveTask& pg_archive_task, const ret = OB_IO_ERROR; } - if (is_io_error_(ret)) { + if (is_io_error(ret)) { int tmp_ret = OB_SUCCESS; if (OB_SUCCESS != (tmp_ret = pg_archive_task.set_file_force_switch( epoch, incarnation_, round, LOG_ARCHIVE_FILE_TYPE_INDEX))) { ARCHIVE_LOG(WARN, "set_file_force_switch fail", KR(tmp_ret), K(pg_archive_task)); ret = tmp_ret; - // rewrite ret } else { ARCHIVE_LOG(INFO, "fake archive index corrupted", KR(ret), K(pg_archive_task)); } } } } +} #endif - // 4. write index file - if (OB_SUCC(ret) && !has_inject_fault) { - ObString storage_info(archive_round_mgr_->storage_info_); - ObArchiveIO archive_io(storage_info); - if (OB_FAIL( - archive_io.push_log(file_path, buffer_len, buffer, need_switch_file, compatible, is_data_file, epoch))) { - ARCHIVE_LOG(WARN, "push_log fail", KR(ret), K(pg_archive_task)); +// 4. write index file +if (OB_SUCC(ret) && !has_inject_fault) { + ObString storage_info(archive_round_mgr_->storage_info_); + ObArchiveIO archive_io(storage_info); + if (OB_FAIL(archive_io.push_log(file_path, buffer_len, buffer, need_switch_file, compatible, is_data_file, epoch))) { + ARCHIVE_LOG(WARN, "push_log fail", KR(ret), K(pg_archive_task)); - if (is_io_error_(ret)) { - int tmp_ret = OB_SUCCESS; - if (OB_SUCCESS != (tmp_ret = pg_archive_task.set_file_force_switch( - epoch, incarnation_, round, LOG_ARCHIVE_FILE_TYPE_INDEX))) { - ARCHIVE_LOG(WARN, "set_file_force_switch fail", KR(tmp_ret), K(pg_archive_task)); - ret = tmp_ret; - } + if (is_io_error(ret)) { + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != + (tmp_ret = pg_archive_task.set_file_force_switch(epoch, incarnation_, round, LOG_ARCHIVE_FILE_TYPE_INDEX))) { + ARCHIVE_LOG(WARN, "set_file_force_switch fail", KR(tmp_ret), K(pg_archive_task)); + ret = tmp_ret; } } } +} - return ret; +return ret; } // check leader before switch file @@ -1235,8 +1237,8 @@ int ObArchiveSender::handle_archive_error_(const ObPGKey& pg_key, const int64_t int ret = OB_SUCCESS; bool io_error_trigger = false; - // statistic IO error - if (is_io_error_(ret_code)) { + // mark IO failed + if (is_io_error(ret_code)) { io_error_trigger = task_status.mark_io_error(); } @@ -1252,9 +1254,10 @@ int ObArchiveSender::handle_archive_error_(const ObPGKey& pg_key, const int64_t K(epoch), K(incarnation), K(round)); - } else if ((is_not_leader_error_(ret_code) || is_io_error_(ret_code) || OB_ALLOCATE_MEMORY_FAILED == ret_code) && + } else if ((is_not_leader_error_(ret_code) || is_io_error(ret_code) || OB_ALLOCATE_MEMORY_FAILED == ret_code || + OB_IO_LIMIT == ret_code) && !io_error_trigger) { - // not leader or IO error, skip it + // skip IO error and not leader error code ARCHIVE_LOG(WARN, "obsolete task or alloc memory fail or io error, skip it", KR(ret_code), @@ -1291,11 +1294,6 @@ int ObArchiveSender::handle_archive_error_(const ObPGKey& pg_key, const int64_t return ret; } -bool ObArchiveSender::is_io_error_(const int ret_code) -{ - return OB_IO_ERROR == ret_code || OB_OSS_ERROR == ret_code; -} - bool ObArchiveSender::is_not_leader_error_(const int ret_code) { return OB_LOG_ARCHIVE_LEADER_CHANGED == ret_code || OB_BACKUP_INFO_NOT_MATCH == ret_code || diff --git a/src/archive/ob_archive_sender.h b/src/archive/ob_archive_sender.h index e5e7764a49..2a9ff7ce33 100644 --- a/src/archive/ob_archive_sender.h +++ b/src/archive/ob_archive_sender.h @@ -71,7 +71,9 @@ public: int handle_task_list(ObArchiveTaskStatus* task_status); private: - bool is_io_error_(const int ret_code); + // void run1(); + // void do_thread_task_(); + // int handle_task_list_(ObArchiveSendTaskStatus &task_status); bool is_not_leader_error_(const int ret_code); int try_retire_task_status_(ObArchiveSendTaskStatus& task_status); diff --git a/src/archive/ob_archive_util.cpp b/src/archive/ob_archive_util.cpp index 930a27d834..855fdc0e49 100644 --- a/src/archive/ob_archive_util.cpp +++ b/src/archive/ob_archive_util.cpp @@ -81,5 +81,11 @@ bool is_valid_archive_compressor_type(const common::ObCompressorType compressor_ { return ((LZ4_COMPRESSOR == compressor_type) || (ZSTD_1_3_8_COMPRESSOR == compressor_type)); } + +bool is_io_error(const int ret_code) +{ + return OB_IO_ERROR == ret_code || OB_OSS_ERROR == ret_code; +} + } // namespace archive } // namespace oceanbase diff --git a/src/archive/ob_archive_util.h b/src/archive/ob_archive_util.h index fb481d842f..b32ea306b4 100644 --- a/src/archive/ob_archive_util.h +++ b/src/archive/ob_archive_util.h @@ -165,6 +165,7 @@ void ob_archive_free(void* ptr); int check_is_leader(const common::ObPGKey& pg_key, const int64_t epoch, bool& is_leader); bool is_valid_archive_compressor_type(const common::ObCompressorType compressor_type); +bool is_io_error(const int ret_code); } // namespace archive } // namespace oceanbase diff --git a/src/share/ob_errno.cpp b/src/share/ob_errno.cpp index dccf8fd11d..c505e1761a 100644 --- a/src/share/ob_errno.cpp +++ b/src/share/ob_errno.cpp @@ -6873,7 +6873,7 @@ static struct ObStrErrorInit { ORACLE_STR_USER_ERROR[-OB_ERR_BAD_FT_COLUMN] = "ORA-00600: internal error code, arguments: -5288, Column '%.*s' cannot be part of FULLTEXT index"; ERROR_NAME[-OB_ERR_KEY_DOES_NOT_EXISTS] = "OB_ERR_KEY_DOES_NOT_EXISTS"; - MYSQL_ERRNO[-OB_ERR_KEY_DOES_NOT_EXISTS] = ER_KEY_DOES_NOT_EXISTS; + MYSQL_ERRNO[-OB_ERR_KEY_DOES_NOT_EXISTS] = OB_ERR_KEY_DOES_NOT_EXISTS; SQLSTATE[-OB_ERR_KEY_DOES_NOT_EXISTS] = "42000"; STR_ERROR[-OB_ERR_KEY_DOES_NOT_EXISTS] = "key does not exist in table"; STR_USER_ERROR[-OB_ERR_KEY_DOES_NOT_EXISTS] = "Key '%.*s' doesn't exist in table '%.*s'"; @@ -13281,6 +13281,14 @@ static struct ObStrErrorInit { "ORA-02266: unique/primary keys in table referenced by enabled foreign keys"; ORACLE_STR_USER_ERROR[-OB_ERR_ATLER_TABLE_ILLEGAL_FK] = "ORA-02266: unique/primary keys in table referenced by enabled foreign keys"; + ERROR_NAME[-OB_IO_LIMIT] = "OB_IO_LIMIT"; + MYSQL_ERRNO[-OB_IO_LIMIT] = -1; + SQLSTATE[-OB_IO_LIMIT] = "HY000"; + STR_ERROR[-OB_IO_LIMIT] = "IO limit"; + STR_USER_ERROR[-OB_IO_LIMIT] = "IO limit"; + ORACLE_ERRNO[-OB_IO_LIMIT] = 600; + ORACLE_STR_ERROR[-OB_IO_LIMIT] = "ORA-00600: internal error code, arguments: -9061, IO limit"; + ORACLE_STR_USER_ERROR[-OB_IO_LIMIT] = "ORA-00600: internal error code, arguments: -9061, IO limit"; ERROR_NAME[-OB_NO_SUCH_FILE_OR_DIRECTORY] = "OB_NO_SUCH_FILE_OR_DIRECTORY"; MYSQL_ERRNO[-OB_NO_SUCH_FILE_OR_DIRECTORY] = -1; SQLSTATE[-OB_NO_SUCH_FILE_OR_DIRECTORY] = "HY000"; diff --git a/src/share/ob_errno.def b/src/share/ob_errno.def index ef982b7767..4299480c37 100644 --- a/src/share/ob_errno.def +++ b/src/share/ob_errno.def @@ -1520,6 +1520,7 @@ DEFINE_ERROR(OB_CANCEL_DELETE_BACKUP_NOT_ALLOWED, -9053, -1, "HY000", "cancel de DEFINE_ERROR(OB_BACKUP_CLEAN_INFO_NOT_EXIST, -9054, -1, "HY000", "backup clean info not exists"); DEFINE_ERROR_EXT(OB_ERR_DROP_TRUNCATE_PARTITION_REBUILD_INDEX, -9055, -1, "HY000", "rebuild global index failed when drop/truncate partitions", "rebuild global index:'%.*s' failed when drop/truncate partitions"); DEFINE_ORACLE_ERROR(OB_ERR_ATLER_TABLE_ILLEGAL_FK, -9056, -1, "HY000", "unique/primary keys in table referenced by enabled foreign keys", 02266, "unique/primary keys in table referenced by enabled foreign keys"); +DEFINE_ERROR(OB_IO_LIMIT, -9061, -1, "HY000", "IO limit"); //////////////////////////////////////////////////////////////// //error code for STORAGE3.0 -9100 ~ diff --git a/src/share/ob_errno.h b/src/share/ob_errno.h index 9f517e49a0..35c3967f28 100644 --- a/src/share/ob_errno.h +++ b/src/share/ob_errno.h @@ -18,6 +18,7 @@ namespace oceanbase { namespace common { + constexpr int OB_LAST_ERROR_CODE = -38105; constexpr int OB_ERR_SQL_START = -5000; constexpr int OB_ERR_SQL_END = -5999; @@ -654,7 +655,6 @@ constexpr int OB_ERR_WRONG_ROWID = -5337; constexpr int OB_ERR_INVALID_WINDOW_FUNCTION_PLACE = -5338; constexpr int OB_ERR_PARSE_PARTITION_LIST = -5339; constexpr int OB_ERR_MULTIPLE_DEF_CONST_IN_LIST_PART = -5340; -// OB_ERR_INVALID_HEX_NUMBER(5342) has been moved to deps/oblib/src/lib/ob_errno.h constexpr int OB_ERR_WRONG_FUNC_ARGUMENTS_TYPE = -5343; constexpr int OB_ERR_MULTI_UPDATE_KEY_CONFLICT = -5344; constexpr int OB_ERR_INSUFFICIENT_PX_WORKER = -5345; @@ -2779,6 +2779,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; #define OB_ERR_DROP_TRUNCATE_PARTITION_REBUILD_INDEX__USER_ERROR_MSG \ "rebuild global index:'%.*s' failed when drop/truncate partitions" #define OB_ERR_ATLER_TABLE_ILLEGAL_FK__USER_ERROR_MSG "unique/primary keys in table referenced by enabled foreign keys" +#define OB_IO_LIMIT__USER_ERROR_MSG "IO limit" #define OB_NO_SUCH_FILE_OR_DIRECTORY__USER_ERROR_MSG "no such file or directory" #define OB_FILE_OR_DIRECTORY_EXIST__USER_ERROR_MSG "file or directory already exist" #define OB_ERR_DUPLICATE_HAVING_CLAUSE_IN_TABLE_EXPRESSION__USER_ERROR_MSG "Duplicate having-clause in table expression" @@ -5184,6 +5185,7 @@ constexpr int OB_ERR_INVALID_DATE_MSG_FMT_V2 = -4219; "ORA-00600: internal error code, arguments: -9055, rebuild global index:'%.*s' failed when drop/truncate partitions" #define OB_ERR_ATLER_TABLE_ILLEGAL_FK__ORA_USER_ERROR_MSG \ "ORA-02266: unique/primary keys in table referenced by enabled foreign keys" +#define OB_IO_LIMIT__ORA_USER_ERROR_MSG "ORA-00600: internal error code, arguments: -9061, IO limit" #define OB_NO_SUCH_FILE_OR_DIRECTORY__ORA_USER_ERROR_MSG \ "ORA-00600: internal error code, arguments: -9100, no such file or directory" #define OB_FILE_OR_DIRECTORY_EXIST__ORA_USER_ERROR_MSG \ diff --git a/src/storage/ob_partition_group.cpp b/src/storage/ob_partition_group.cpp index 67b627544b..351f0cee27 100644 --- a/src/storage/ob_partition_group.cpp +++ b/src/storage/ob_partition_group.cpp @@ -4038,7 +4038,7 @@ int ObPartitionGroup::get_curr_clog_info_( if (OB_FAIL(get_base_storage_info_(clog_info))) { STORAGE_LOG(WARN, "fail to get base storage info", K(ret), K(pkey_)); } else if (0 == clog_info.get_last_replay_log_id()) { - // skip fetching log_archive_status + // do nothing } else if (ObServerConfig::get_instance().enable_log_archive && !is_sys_tenant && !is_restore && src_cluster_id == self_cluster_id) { // Only the requests from the database itself need to obtain the archive point if the archive is enabled.