From b0ee07126bc8d27d4955fe5921a25cc48101f359 Mon Sep 17 00:00:00 2001 From: obdev Date: Fri, 29 Nov 2024 07:44:40 +0000 Subject: [PATCH] Add testing for the object storage IO retry mechanism --- .../src/lib/restore/ob_storage_oss_base.cpp | 5 ++++- .../src/lib/restore/ob_storage_s3_base.cpp | 2 +- .../src/lib/restore/ob_storage_s3_base.h | 6 +++++- .../oblib/src/lib/utility/ob_tracepoint_def.h | 1 + .../ob_admin_test_io_device_executor.cpp | 19 +++++++++++++++++-- 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/deps/oblib/src/lib/restore/ob_storage_oss_base.cpp b/deps/oblib/src/lib/restore/ob_storage_oss_base.cpp index 46be0fecd..6e350bf6e 100644 --- a/deps/oblib/src/lib/restore/ob_storage_oss_base.cpp +++ b/deps/oblib/src/lib/restore/ob_storage_oss_base.cpp @@ -354,7 +354,10 @@ bool ObStorageOSSRetryStrategy::should_retry_impl_( const RetType &outcome, const int64_t attempted_retries) const { bool bret = false; - if (OB_ISNULL(outcome)) { + if (OB_SUCCESS != EventTable::EN_OBJECT_STORAGE_IO_RETRY) { + bret = true; + OB_LOG(INFO, "errsim object storage IO retry"); + } else if (OB_ISNULL(outcome)) { bret = false; } else if (aos_status_is_ok(outcome)) { bret = false; diff --git a/deps/oblib/src/lib/restore/ob_storage_s3_base.cpp b/deps/oblib/src/lib/restore/ob_storage_s3_base.cpp index c3d238e2f..d85fd1683 100644 --- a/deps/oblib/src/lib/restore/ob_storage_s3_base.cpp +++ b/deps/oblib/src/lib/restore/ob_storage_s3_base.cpp @@ -12,7 +12,6 @@ #include "lib/restore/ob_storage.h" #include "ob_storage_s3_base.h" -#include "lib/utility/ob_tracepoint.h" namespace oceanbase { namespace common @@ -2511,6 +2510,7 @@ int ObStorageS3MultiPartWriter::write_single_part_() data_stream->write(base_buf_, base_buf_pos_); data_stream->flush(); request.SetBody(data_stream); + request.SetContentLength(static_cast(request.GetBody()->tellp())); Aws::S3::Model::UploadPartOutcome outcome; if (OB_FAIL(set_request_checkusum_algorithm(request, checksum_type_))) { diff --git a/deps/oblib/src/lib/restore/ob_storage_s3_base.h b/deps/oblib/src/lib/restore/ob_storage_s3_base.h index 4fc58e5d3..c88e2610f 100644 --- a/deps/oblib/src/lib/restore/ob_storage_s3_base.h +++ b/deps/oblib/src/lib/restore/ob_storage_s3_base.h @@ -23,6 +23,7 @@ #include "lib/allocator/ob_vslice_alloc.h" #include #include +#include "lib/utility/ob_tracepoint.h" #pragma push_macro("private") #undef private @@ -323,7 +324,10 @@ protected: const RetType &outcome, const int64_t attempted_retries) const override { bool bret = false; - if (outcome.IsSuccess()) { + if (OB_SUCCESS != EventTable::EN_OBJECT_STORAGE_IO_RETRY) { + bret = true; + OB_LOG(INFO, "errsim object storage IO retry", K(outcome.IsSuccess())); + } else if (outcome.IsSuccess()) { bret = false; } else if (outcome.GetError().ShouldRetry()) { bret = true; diff --git a/deps/oblib/src/lib/utility/ob_tracepoint_def.h b/deps/oblib/src/lib/utility/ob_tracepoint_def.h index 5ea5b2fd8..360732940 100644 --- a/deps/oblib/src/lib/utility/ob_tracepoint_def.h +++ b/deps/oblib/src/lib/utility/ob_tracepoint_def.h @@ -599,5 +599,6 @@ GLOBAL_ERRSIM_POINT_DEF(2453, EN_ENABLE_NEW_RESULT_META_DATA, "For testing enabl GLOBAL_ERRSIM_POINT_DEF(2501, EN_CHECK_SORT_CMP, "Used to check the legality of the compare method for std::sort"); GLOBAL_ERRSIM_POINT_DEF(2502, EN_ENABLE_SHARED_STORAGE_COLUMN_GROUP, "whether to enable column group under shared storage mode"); +GLOBAL_ERRSIM_POINT_DEF(2503, EN_OBJECT_STORAGE_IO_RETRY, "whether to enable probability-based retries for object storage IO"); #endif /*GLOBAL_ERRSIM_POINT_DEF*/ diff --git a/tools/ob_admin/io_device/ob_admin_test_io_device_executor.cpp b/tools/ob_admin/io_device/ob_admin_test_io_device_executor.cpp index add2a21d8..6176a549b 100644 --- a/tools/ob_admin/io_device/ob_admin_test_io_device_executor.cpp +++ b/tools/ob_admin/io_device/ob_admin_test_io_device_executor.cpp @@ -81,12 +81,13 @@ int ObAdminTestIODeviceExecutor::parse_cmd_(int argc, char *argv[]) int ret = OB_SUCCESS; int opt = 0; int index = -1; - const char *opt_str = "h:d:s:q:e:"; + const char *opt_str = "h:d:s:q:e:f:"; struct option longopts[] = {{"help", 0, NULL, 'h'}, {"backup_path", 1, NULL, 'd'}, {"storage_info", 1, NULL, 's'}, {"quiet", 0, NULL, 'q' }, {"s3_url_encode_type", 0, NULL, 'e'}, + {"trigger_freq", 0, NULL, 'f'}, // used for internal testing only {NULL, 0, NULL, 0}}; while (OB_SUCC(ret) && -1 != (opt = getopt_long(argc, argv, opt_str, longopts, &index))) { switch (opt) { @@ -120,6 +121,20 @@ int ObAdminTestIODeviceExecutor::parse_cmd_(int argc, char *argv[]) } break; } + case 'f': { + int tmp_ret = OB_SUCCESS; + int64_t trigger_freq = 0; + if (OB_TMP_FAIL(c_str_to_int(optarg, trigger_freq))) { + OB_LOG(WARN, "fail to parse trigger freq", KR(tmp_ret), K((char *)optarg)); + } else if (OB_UNLIKELY(trigger_freq < 0)) { + tmp_ret = OB_INVALID_ARGUMENT; + OB_LOG(WARN, "invalid trigger freq", KR(tmp_ret), K((char *)optarg)); + } else { + TP_SET_EVENT(EventTable::EN_OBJECT_STORAGE_IO_RETRY, + OB_OBJECT_STORAGE_IO_ERROR, 0, trigger_freq); + } + break; + } default: { print_usage_(); exit(1); @@ -791,4 +806,4 @@ int ObAdminTestIODeviceExecutor::print_usage_() } } //tools -} //oceanbase +} //oceanbase \ No newline at end of file