Fix px infinite retry loop in PL. && Clean up interrupt status at the beginning of the thread
This commit is contained in:
		 qianchanger
					qianchanger
				
			
				
					committed by
					
						 LINxiansheng
						LINxiansheng
					
				
			
			
				
	
			
			
			 LINxiansheng
						LINxiansheng
					
				
			
						parent
						
							3cec53aaec
						
					
				
				
					commit
					5ff640d9b8
				
			| @ -474,6 +474,13 @@ int ObInnerSQLConnection::process_retry( | |||||||
|     THIS_WORKER.sched_wait(); |     THIS_WORKER.sched_wait(); | ||||||
|     usleep(static_cast<unsigned int>(sleep_time_us)); |     usleep(static_cast<unsigned int>(sleep_time_us)); | ||||||
|     THIS_WORKER.sched_run(); |     THIS_WORKER.sched_run(); | ||||||
|  |   } else if (repeatable_stmt && is_scheduler_thread_not_enough_err(last_ret)) { | ||||||
|  |     const int64_t sleep_time_us = 10 * 1000; // 10ms | ||||||
|  |     need_retry = true; | ||||||
|  |     LOG_WARN("scheduler thread not enough, need retry", K(ret), K(last_ret), K(retry_cnt)); | ||||||
|  |     THIS_WORKER.sched_wait(); | ||||||
|  |     usleep(static_cast<unsigned int>(sleep_time_us)); | ||||||
|  |     THIS_WORKER.sched_run(); | ||||||
|   } |   } | ||||||
|   if (get_session().is_nested_session()) { |   if (get_session().is_nested_session()) { | ||||||
|     /** |     /** | ||||||
|  | |||||||
| @ -20,6 +20,7 @@ | |||||||
| #include "lib/stat/ob_session_stat.h" | #include "lib/stat/ob_session_stat.h" | ||||||
| #include "share/config/ob_server_config.h" | #include "share/config/ob_server_config.h" | ||||||
| #include "sql/engine/px/ob_px_admission.h" | #include "sql/engine/px/ob_px_admission.h" | ||||||
|  | #include "share/interrupt/ob_global_interrupt_call.h" | ||||||
| #include "ob_th_worker.h" | #include "ob_th_worker.h" | ||||||
| #include "ob_worker_pool.h" | #include "ob_worker_pool.h" | ||||||
| #include "ob_multi_tenant.h" | #include "ob_multi_tenant.h" | ||||||
| @ -133,8 +134,9 @@ void ObPxPool::run1() | |||||||
|   if (OB_LIKELY(nullptr != pm)) { |   if (OB_LIKELY(nullptr != pm)) { | ||||||
|     pm->set_tenant_ctx(tenant_id_, common::ObCtxIds::WORK_AREA); |     pm->set_tenant_ctx(tenant_id_, common::ObCtxIds::WORK_AREA); | ||||||
|   } |   } | ||||||
|  |   CLEAR_INTERRUPTABLE(); | ||||||
|   ObCgroupCtrl* cgroup_ctrl = GCTX.cgroup_ctrl_; |   ObCgroupCtrl* cgroup_ctrl = GCTX.cgroup_ctrl_; | ||||||
|   LOG_INFO("XXXXX: run px pool", K(group_id_), K(tenant_id_)); |   LOG_INFO("run px pool", K(group_id_), K(tenant_id_)); | ||||||
|   if (nullptr != cgroup_ctrl && OB_LIKELY(cgroup_ctrl->is_valid())) { |   if (nullptr != cgroup_ctrl && OB_LIKELY(cgroup_ctrl->is_valid())) { | ||||||
|     pid_t pid = static_cast<pid_t>(syscall(__NR_gettid)); |     pid_t pid = static_cast<pid_t>(syscall(__NR_gettid)); | ||||||
|     cgroup_ctrl->add_thread_to_cgroup(tenant_id_, group_id_, pid); |     cgroup_ctrl->add_thread_to_cgroup(tenant_id_, group_id_, pid); | ||||||
|  | |||||||
| @ -392,6 +392,7 @@ void ObThWorker::worker(int64_t& tenant_id, int64_t& req_recv_timestamp, int32_t | |||||||
|               ret = pm->set_tenant_ctx(tenant_->id(), ObCtxIds::DEFAULT_CTX_ID); |               ret = pm->set_tenant_ctx(tenant_->id(), ObCtxIds::DEFAULT_CTX_ID); | ||||||
|             } |             } | ||||||
|           } |           } | ||||||
|  |           CLEAR_INTERRUPTABLE(); | ||||||
|           set_th_worker_thread_name(tenant_->id()); |           set_th_worker_thread_name(tenant_->id()); | ||||||
|           lib::ContextTLOptGuard guard(true); |           lib::ContextTLOptGuard guard(true); | ||||||
|           lib::ContextParam param; |           lib::ContextParam param; | ||||||
|  | |||||||
| @ -97,6 +97,14 @@ ObGlobalInterruptManager* ObGlobalInterruptManager::getInstance() | |||||||
|   return instance_; |   return instance_; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void ObInterruptChecker::clear_interrupt_status() | ||||||
|  | { | ||||||
|  |   if (ref_count_ > 0) { | ||||||
|  |     LIB_LOG(ERROR, "invlid interrupt ref count"); | ||||||
|  |   } | ||||||
|  |   interrupted_ = false; | ||||||
|  | } | ||||||
|  |  | ||||||
| int ObGlobalInterruptManager::init(const common::ObAddr& local, ObInterruptRpcProxy* rpc_proxy) | int ObGlobalInterruptManager::init(const common::ObAddr& local, ObInterruptRpcProxy* rpc_proxy) | ||||||
| { | { | ||||||
|   int ret = OB_SUCCESS; |   int ret = OB_SUCCESS; | ||||||
|  | |||||||
| @ -124,6 +124,7 @@ public: | |||||||
|   void clear_status(); |   void clear_status(); | ||||||
|  |  | ||||||
|   void interrupt(ObInterruptCode& interrupt_code); |   void interrupt(ObInterruptCode& interrupt_code); | ||||||
|  |   void clear_interrupt_status(); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|   /* |   /* | ||||||
| @ -339,6 +340,15 @@ OB_INLINE void UNSET_INTERRUPTABLE(const ObInterruptibleTaskID& tid) | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | OB_INLINE void CLEAR_INTERRUPTABLE() | ||||||
|  | { | ||||||
|  |   if (OB_ISNULL(get_checker())) { | ||||||
|  |     LIB_LOG(ERROR, "interrupt checker may not be set correctly"); | ||||||
|  |   } else { | ||||||
|  |     get_checker()->clear_interrupt_status(); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| } // end namespace common | } // end namespace common | ||||||
| } // end namespace oceanbase | } // end namespace oceanbase | ||||||
|  |  | ||||||
|  | |||||||
| @ -70,8 +70,7 @@ int ObPxAdmission::enter_query_admission( | |||||||
|       LOG_WARN("fail check query status", K(ret)); |       LOG_WARN("fail check query status", K(ret)); | ||||||
|     } else if (!ObPxAdmission::admit(req_worker_count, admit_worker_count)) { |     } else if (!ObPxAdmission::admit(req_worker_count, admit_worker_count)) { | ||||||
|       plan.inc_delayed_px_querys(); |       plan.inc_delayed_px_querys(); | ||||||
|       THIS_WORKER.set_retry_flag(); |       ret = OB_ERR_SCHEDULER_THREAD_NOT_ENOUGH; | ||||||
|       ret = OB_EAGAIN; |  | ||||||
|       LOG_INFO("It's a px query, out of px worker resource, " |       LOG_INFO("It's a px query, out of px worker resource, " | ||||||
|                "need delay, do not need disconnect", |                "need delay, do not need disconnect", | ||||||
|           K(req_worker_count), |           K(req_worker_count), | ||||||
|  | |||||||
| @ -101,12 +101,7 @@ OB_INLINE int ObResultSet::open_plan() | |||||||
|       if (OB_FAIL(ObPxAdmission::enter_query_admission( |       if (OB_FAIL(ObPxAdmission::enter_query_admission( | ||||||
|               my_session_, get_exec_context(), *get_physical_plan(), worker_count_))) { |               my_session_, get_exec_context(), *get_physical_plan(), worker_count_))) { | ||||||
|         // query is not admitted to run |         // query is not admitted to run | ||||||
|         if (OB_EAGAIN == ret) { |  | ||||||
|           ret = OB_ERR_SCHEDULER_THREAD_NOT_ENOUGH; |  | ||||||
|         LOG_DEBUG("Query is not admitted to run, try again", K(ret)); |         LOG_DEBUG("Query is not admitted to run, try again", K(ret)); | ||||||
|         } else { |  | ||||||
|           LOG_WARN("Fail to get admission to use px", K(ret)); |  | ||||||
|         } |  | ||||||
|       } else if (THIS_WORKER.is_timeout()) { |       } else if (THIS_WORKER.is_timeout()) { | ||||||
|         ret = OB_TIMEOUT; |         ret = OB_TIMEOUT; | ||||||
|         LOG_WARN("query is timeout", |         LOG_WARN("query is timeout", | ||||||
| @ -1108,7 +1103,6 @@ void ObResultSet::refresh_location_cache(ObTaskExecutorCtx &task_exec_ctx, bool | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| // obmp_query中重试整个SQL之前,可能需要调用本接口来刷新Location,以避免总是发给了错误的服务器 |  | ||||||
| int ObResultSet::refresh_location_cache(bool is_nonblock) | int ObResultSet::refresh_location_cache(bool is_nonblock) | ||||||
| { | { | ||||||
|   return ObTaskExecutorCtxUtil::refresh_location_cache(get_exec_context().get_task_exec_ctx(), is_nonblock); |   return ObTaskExecutorCtxUtil::refresh_location_cache(get_exec_context().get_task_exec_ctx(), is_nonblock); | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user