From 80097f0752acb645c3be64e17410ada9e57db905 Mon Sep 17 00:00:00 2001 From: hzq Date: Wed, 6 Sep 2023 09:04:28 +0800 Subject: [PATCH] [fix](fragment mgr) Do not cancel queries when upgrading cluster. #23924 --- be/src/runtime/fragment_mgr.cpp | 37 +++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 4c8d4edf7e..8e347be172 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -991,22 +991,33 @@ void FragmentMgr::cancel_worker() { } const auto& running_fes = ExecEnv::GetInstance()->get_running_frontends(); - for (const auto& q : _query_ctx_map) { - auto itr = running_fes.find(q.second->coord_addr); - if (itr != running_fes.end()) { - // We use conservative strategy. - // 1. If same process uuid, do not cancel - // 2. If fe has zero process uuid, do not cancel - // 3. If query's process uuid is zero, do not cancel - if (q.second->get_fe_process_uuid() == itr->second.info.process_uuid || - itr->second.info.process_uuid == 0 || - q.second->get_fe_process_uuid() == 0) { + + // We use a very conservative cancel strategy. + // 0. If there are no running frontends, do not cancel any queries. + // 1. If query's process uuid is zero, do not cancel + // 2. If same process uuid, do not cancel + // 3. If fe has zero process uuid, do not cancel + if (running_fes.empty()) { + LOG_EVERY_N(WARNING, 10) + << "Could not find any running frontends, maybe we are upgrading? " + << "We will not cancel any running queries in this situation."; + } else { + for (const auto& q : _query_ctx_map) { + if (q.second->get_fe_process_uuid() == 0) { continue; } - } - // Coorninator of this query has already dead. - queries_to_cancel.push_back(q.first); + auto itr = running_fes.find(q.second->coord_addr); + if (itr != running_fes.end()) { + if (q.second->get_fe_process_uuid() == itr->second.info.process_uuid || + itr->second.info.process_uuid == 0) { + continue; + } + } + + // Coorninator of this query has already dead. + queries_to_cancel.push_back(q.first); + } } }