diff --git a/src/clog/ob_clog_mgr.cpp b/src/clog/ob_clog_mgr.cpp index e32a19935..91a5882ef 100644 --- a/src/clog/ob_clog_mgr.cpp +++ b/src/clog/ob_clog_mgr.cpp @@ -3170,12 +3170,12 @@ int ObCLogMgr::get_election_group_priority(const uint64_t tenant_id, election::O OBSERVER.get_gctx().rs_server_status_ == share::RSS_IS_WORKING; int tmp_ret = OB_SUCCESS; bool is_data_disk_error = false; - bool is_clog_disk_error = log_engine_.is_clog_disk_error(); + bool is_clog_disk_hang = log_engine_.is_clog_disk_hang(); if (OB_SUCCESS != (tmp_ret = ObIOManager::get_instance().is_disk_error(is_data_disk_error))) { CLOG_LOG(WARN, "is_data_disk_error failed", K(tmp_ret)); } - if (is_clog_disk_error) { - priority.set_system_clog_disk_error(); + if (is_clog_disk_hang) { + priority.set_system_clog_disk_hang(); } if (is_data_disk_error) { priority.set_system_data_disk_error(); diff --git a/src/clog/ob_clog_writer.cpp b/src/clog/ob_clog_writer.cpp index 314014205..225266623 100644 --- a/src/clog/ob_clog_writer.cpp +++ b/src/clog/ob_clog_writer.cpp @@ -37,6 +37,7 @@ namespace clog { ObCLogWriter::ObCLogWriter() : is_started_(false), is_disk_error_(false), + is_disk_hang_(false), file_mutex_(), file_writer_(NULL), type_(INVALID_WRITE_POOL), @@ -111,6 +112,7 @@ void ObCLogWriter::destroy() info_getter_ = NULL; tail_ = NULL; is_disk_error_ = false; + is_disk_hang_ = false; is_started_ = false; } @@ -134,34 +136,31 @@ file_id_t ObCLogWriter::get_file_id() const return (NULL == file_writer_) ? 0 : file_writer_->get_cur_file_id(); } -bool ObCLogWriter::is_disk_error() const +bool ObCLogWriter::is_disk_hang() const { - bool b_ret = ATOMIC_LOAD(&is_disk_error_); - if (!b_ret && nullptr != file_writer_) { - b_ret = file_writer_->is_write_hang(); - } - return b_ret; + bool is_disk_hang = ATOMIC_LOAD(&is_disk_hang_); + return is_disk_hang; } -int ObCLogWriter::set_is_disk_error() +int ObCLogWriter::set_is_disk_hang() { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited())) { ret = OB_NOT_INIT; } else { - ATOMIC_STORE(&is_disk_error_, true); + ATOMIC_STORE(&is_disk_hang_, true); CLOG_LOG(WARN, "clog disk may be hang or something error has happen!"); } return ret; } -int ObCLogWriter::reset_is_disk_error() +int ObCLogWriter::reset_is_disk_hang() { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited())) { ret = OB_NOT_INIT; } else { - ATOMIC_STORE(&is_disk_error_, false); + ATOMIC_STORE(&is_disk_hang_, false); CLOG_LOG(TRACE, "reset clog disk status to normal"); } return ret; @@ -241,7 +240,7 @@ void ObCLogWriter::process_log_items(common::ObIBaseLogItem** items, const int64 do { // TODO: flush log will not return OB_TIMEOUT, other IO error will be treated as bug if (OB_FAIL(file_writer_->flush(info_getter_, log_cache_, tail_, flush_start_offset))) { - set_is_disk_error(); + is_disk_error_ = true; // flush log to disk until die when IO hang, other IO error will be treated as bug if (OB_TIMEOUT == ret && REACH_TIME_INTERVAL(60 * 1000 * 1000)) { ret = OB_IO_ERROR; @@ -250,7 +249,7 @@ void ObCLogWriter::process_log_items(common::ObIBaseLogItem** items, const int64 CLOG_LOG(ERROR, "Fail to flush clog to disk, ", K(ret)); } } else { - reset_is_disk_error(); + is_disk_error_ = false; } } while (!has_stoped() && OB_TIMEOUT == ret); } @@ -386,8 +385,8 @@ int ObCLogDiskErrorCB::callback() storage::ObPartitionService& partition_service = storage::ObPartitionService::get_instance(); if (OB_ISNULL(host_)) { ret = OB_ERR_UNEXPECTED; - } else if (OB_FAIL(host_->set_is_disk_error())) { - CLOG_LOG(ERROR, "ObCLogDiskErrorCB set_is_disk_error failed", K(ret)); + } else if (OB_FAIL(host_->set_is_disk_hang())) { + CLOG_LOG(ERROR, "ObCLogDiskErrorCB set_is_disk_hang failed", K(ret)); } else if (OB_FAIL(partition_service.try_revoke_all_leader(ObElection::RevokeType::CLOG_DISK_HANG))) { CLOG_LOG(ERROR, "ObCLogDiskErrorCB try_revoke_all_leader failed", K(ret)); } @@ -400,7 +399,7 @@ void ObCLogDiskErrorCB::destroy() // If disk has real error, may cause is_disk_error be false, // however, observer will be killed. if (OB_NOT_NULL(host_)) { - (void)host_->reset_is_disk_error(); + (void)host_->reset_is_disk_hang(); } } diff --git a/src/clog/ob_clog_writer.h b/src/clog/ob_clog_writer.h index 6cff04226..3fada8511 100644 --- a/src/clog/ob_clog_writer.h +++ b/src/clog/ob_clog_writer.h @@ -65,9 +65,13 @@ public: virtual void destroy(); int switch_file(); file_id_t get_file_id() const; - bool is_disk_error() const; - int set_is_disk_error(); - int reset_is_disk_error(); + bool is_disk_hang() const; + inline bool is_disk_error() const + { + return true == is_disk_error_; + } + int set_is_disk_hang(); + int reset_is_disk_hang(); protected: virtual void process_log_items(common::ObIBaseLogItem** items, const int64_t item_cnt, int64_t& finish_cnt); @@ -99,6 +103,7 @@ private: int inner_switch_file(); bool is_started_; bool is_disk_error_; + bool is_disk_hang_; lib::ObMutex file_mutex_; ObCLogBaseFileWriter* file_writer_; ObLogWritePoolType type_; diff --git a/src/clog/ob_i_log_engine.h b/src/clog/ob_i_log_engine.h index 9efe11a11..134c6b51c 100644 --- a/src/clog/ob_i_log_engine.h +++ b/src/clog/ob_i_log_engine.h @@ -198,7 +198,7 @@ public: virtual int check_is_clog_obsoleted(const common::ObPartitionKey& partition_key, const file_id_t file_id, const offset_t offset, bool& is_obsoleted) const = 0; - virtual bool is_clog_disk_error() const = 0; + virtual bool is_clog_disk_hang() const = 0; // ================== interface for ObIlogStorage end ==================== }; diff --git a/src/clog/ob_log_engine.cpp b/src/clog/ob_log_engine.cpp index 00bee1771..7baeecb82 100644 --- a/src/clog/ob_log_engine.cpp +++ b/src/clog/ob_log_engine.cpp @@ -2553,16 +2553,16 @@ int ObLogEngine::get_ilog_using_disk_space(int64_t& space) const return ret; } -bool ObLogEngine::is_clog_disk_error() const +bool ObLogEngine::is_clog_disk_hang() const { - bool is_disk_error = false; - const ObCommitLogEnv* env = get_clog_env_(); + bool is_disk_hang = false; + const ObCommitLogEnv *env = get_clog_env_(); if (IS_NOT_INIT) { - is_disk_error = false; + is_disk_hang = false; } else if (OB_LIKELY(NULL != env)) { - is_disk_error = (env->get_writer()).is_disk_error(); + is_disk_hang = (env->get_writer()).is_disk_hang(); } - return is_disk_error; + return is_disk_hang; } NetworkLimitManager::NetworkLimitManager() : is_inited_(false), addr_array_(), ethernet_speed_(0), hash_map_() diff --git a/src/clog/ob_log_engine.h b/src/clog/ob_log_engine.h index 43c14b600..840b56960 100644 --- a/src/clog/ob_log_engine.h +++ b/src/clog/ob_log_engine.h @@ -531,10 +531,9 @@ public: int check_is_clog_obsoleted(const common::ObPartitionKey& partition_key, const file_id_t file_id, const offset_t offset, bool& is_obsoleted) const override; - // ================== interface for ObIlogStorage end ==================== - int get_clog_using_disk_space(int64_t& space) const; - int get_ilog_using_disk_space(int64_t& space) const; - bool is_clog_disk_error() const override; + int get_clog_using_disk_space(int64_t &space) const; + int get_ilog_using_disk_space(int64_t &space) const; + bool is_clog_disk_hang() const; private: int fetch_log_from_server( diff --git a/src/clog/ob_partition_log_service.cpp b/src/clog/ob_partition_log_service.cpp index e063f954a..1e2e2d493 100644 --- a/src/clog/ob_partition_log_service.cpp +++ b/src/clog/ob_partition_log_service.cpp @@ -3908,7 +3908,8 @@ int ObPartitionLogService::on_get_election_priority(election::ObElectionPriority #endif bool is_tenant_out_of_mem = is_tenant_out_of_memory_(); bool is_data_disk_error = false; - bool is_clog_disk_error = log_engine_->is_clog_disk_error(); + bool is_disk_space_enough = log_engine_->is_disk_space_enough(); + bool is_clog_disk_hang = log_engine_->is_clog_disk_hang(); const ObReplicaProperty replica_property = mm_.get_replica_property(); const uint64_t log_id = sw_.get_max_confirmed_log_id(); if (OB_SUCCESS != (tmp_ret = ObIOManager::get_instance().is_disk_error(is_data_disk_error))) { @@ -3933,8 +3934,11 @@ int ObPartitionLogService::on_get_election_priority(election::ObElectionPriority if (OB_SUCCESS != (ret = priority.init(is_candidate, mm_.get_timestamp(), log_id, zone_priority))) { CLOG_LOG(WARN, "priority init error", K_(partition_key), K(ret)); } else { - if (is_clog_disk_error) { - priority.set_system_clog_disk_error(); + if (!is_disk_space_enough) { + priority.set_system_disk_full(); + } + if (is_clog_disk_hang) { + priority.set_system_clog_disk_hang(); } if (is_data_disk_error) { priority.set_system_data_disk_error(); @@ -6702,7 +6706,7 @@ int ObPartitionLogService::check_is_normal_partition(bool& is_normal_partition) bool is_out_of_memory = false; bool is_disk_not_enough = false; bool is_disk_error = false; - bool is_clog_disk_error = false; + bool is_clog_disk_hang = false; bool is_archive_restoring = false; if (IS_NOT_INIT) { ret = OB_NOT_INIT; @@ -6712,13 +6716,13 @@ int ObPartitionLogService::check_is_normal_partition(bool& is_normal_partition) } else if (OB_FAIL(ObIOManager::get_instance().is_disk_error(is_disk_error))) { CLOG_LOG(ERROR, "is_disk_error failed", K(ret), K(partition_key_)); } else { - is_clog_disk_error = log_engine_->is_clog_disk_error(); + is_clog_disk_hang = log_engine_->is_clog_disk_hang(); is_disk_not_enough = !log_engine_->is_disk_space_enough(); // physical restoring replica cannot participate in member change. // because its election module has not been started, it cannot vote. is_archive_restoring = restore_mgr_.is_archive_restoring(); is_normal_partition = - !(is_disk_not_enough || is_out_of_memory || is_disk_error || is_clog_disk_error || is_archive_restoring); + !(is_disk_not_enough || is_out_of_memory || is_disk_error || is_clog_disk_hang || is_archive_restoring); } return ret; } diff --git a/src/election/ob_election_group_priority.cpp b/src/election/ob_election_group_priority.cpp index 4b77e77bf..f81cafbc1 100644 --- a/src/election/ob_election_group_priority.cpp +++ b/src/election/ob_election_group_priority.cpp @@ -64,9 +64,9 @@ int ObElectionGroupPriority::compare(const ObElectionGroupPriority& priority) co return ret; } -void ObElectionGroupPriority::set_system_clog_disk_error() +void ObElectionGroupPriority::set_system_clog_disk_hang() { - system_score_ += SYSTEM_SCORE_CLOG_DISK_ERROR * 100; + system_score_ += SYSTEM_SCORE_CLOG_DISK_HANG * 100; } void ObElectionGroupPriority::set_system_data_disk_error() diff --git a/src/election/ob_election_group_priority.h b/src/election/ob_election_group_priority.h index df6de3d68..1da2a2634 100644 --- a/src/election/ob_election_group_priority.h +++ b/src/election/ob_election_group_priority.h @@ -48,7 +48,7 @@ public: { return system_score_; } - void set_system_clog_disk_error(); + void set_system_clog_disk_hang(); void set_system_data_disk_error(); void set_system_service_not_started(); @@ -59,7 +59,7 @@ public: DECLARE_TO_STRING_AND_YSON; private: - const static int64_t SYSTEM_SCORE_CLOG_DISK_ERROR = (1 << 6); + const static int64_t SYSTEM_SCORE_CLOG_DISK_HANG = (1 << 6); const static int64_t SYSTEM_SCORE_DATA_DISK_ERROR = (1 << 4); const static int64_t SYSTEM_SCORE_SERVICE_NOT_STARTED = (1 << 1); diff --git a/src/election/ob_election_priority.cpp b/src/election/ob_election_priority.cpp index 55c3c8611..0c77bf33e 100644 --- a/src/election/ob_election_priority.cpp +++ b/src/election/ob_election_priority.cpp @@ -197,9 +197,14 @@ bool ObElectionPriority::is_in_election_blacklist() const return (system_score_ / 100) & SYSTEM_SCORE_IN_ELECTION_BLACKLIST; } -void ObElectionPriority::set_system_clog_disk_error() +void ObElectionPriority::set_system_disk_full() { - system_score_ += SYSTEM_SCORE_CLOG_DISK_ERROR * 100; + system_score_ += SYSTEM_SCORE_DISK_FULL * 100; +} + +void ObElectionPriority::set_system_clog_disk_hang() +{ + system_score_ += SYSTEM_SCORE_CLOG_DISK_HANG * 100; } void ObElectionPriority::set_system_tenant_out_of_memory() diff --git a/src/election/ob_election_priority.h b/src/election/ob_election_priority.h index 1a3b134c3..cdaf6eac9 100644 --- a/src/election/ob_election_priority.h +++ b/src/election/ob_election_priority.h @@ -76,7 +76,8 @@ public: } int64_t get_system_score_without_election_blacklist() const; bool is_in_election_blacklist() const; - void set_system_clog_disk_error(); + void set_system_disk_full(); + void set_system_clog_disk_hang(); void set_system_tenant_out_of_memory(); void set_system_data_disk_error(); void set_system_need_rebuild(); @@ -96,8 +97,10 @@ private: int compare_(const ObElectionPriority& priority, const bool with_locality, const bool with_log_id) const; private: - const static int64_t SYSTEM_SCORE_CLOG_DISK_ERROR = (1 << 6); - const static int64_t SYSTEM_SCORE_TENANT_OUT_OF_MEM = (1 << 5); + const static int64_t SYSTEM_SCORE_DISK_FULL = (1 << 8); + const static int64_t SYSTEM_SCORE_NON_FULL_REPLICA = (1 << 7); + const static int64_t SYSTEM_SCORE_CLOG_DISK_HANG = (1 << 6); + const static int64_t SYSTEM_SCORE_TENANT_OUT_OF_MEM = (1 << 5); // tenant memstore is full const static int64_t SYSTEM_SCORE_DATA_DISK_ERROR = (1 << 4); const static int64_t SYSTEM_SCORE_NEED_REBUILD = (1 << 3); const static int64_t SYSTEM_SCORE_IN_ELECTION_BLACKLIST = (1 << 2);