Files
oceanbase/src/sql/executor/ob_distributed_scheduler.cpp
oceanbase-admin cea7de1475 init push
2021-05-31 22:56:52 +08:00

1373 lines
52 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_EXE
#include "sql/session/ob_sql_session_info.h"
#include "share/ob_thread_mgr.h"
#include "lib/ob_running_mode.h"
#include "sql/monitor/ob_exec_stat_collector.h"
#include "sql/executor/ob_distributed_scheduler.h"
#include "sql/executor/ob_distributed_job_executor.h"
#include "sql/executor/ob_distributed_task_executor.h"
#include "sql/executor/ob_local_job_executor.h"
#include "sql/executor/ob_local_task_executor.h"
#include "sql/executor/ob_executor_rpc_processor.h"
#include "sql/engine/ob_exec_context.h"
#include "sql/monitor/ob_exec_stat_collector.h"
#include "sql/executor/ob_mini_task_executor.h"
#include "observer/ob_server_struct.h"
using namespace oceanbase::common;
using namespace oceanbase::obrpc;
namespace oceanbase {
namespace sql {
ObDistributedSchedulerManager* ObDistributedSchedulerManager::instance_ = NULL;
ObDistributedScheduler::ObDistributedScheduler()
: ObSqlScheduler(),
allocator_(ObModIds::OB_SQL_EXECUTOR_SCHEDULER),
execution_id_(OB_INVALID_ID),
finish_queue_(),
response_task_events_(),
lock_(),
spfactory_(),
job_control_(),
parser_(),
exec_stat_collector_(NULL),
trans_result_(),
should_stop_(false),
root_finish_(false),
sche_finish_(false),
sche_ret_(OB_SUCCESS),
can_serial_exec_(false),
sche_thread_started_(false),
scheduler_id_(0),
rpc_error_addrs_()
{}
ObDistributedScheduler::~ObDistributedScheduler()
{
for (int64_t i = 0; i < response_task_events_.count(); ++i) {
ObTaskCompleteEvent*& evt = response_task_events_.at(i);
if (NULL != evt) {
evt->~ObTaskCompleteEvent();
evt = NULL;
}
}
}
int ObDistributedScheduler::init()
{
int ret = OB_SUCCESS;
if (OB_FAIL(finish_queue_.init(MAX_FINISH_QUEUE_CAPACITY))) {
LOG_WARN("fail to init finish queue for all", K(ret), LITERAL_K(MAX_FINISH_QUEUE_CAPACITY));
}
return ret;
}
int ObDistributedScheduler::stop()
{
should_stop_ = true;
return OB_SUCCESS;
}
void ObDistributedScheduler::reset()
{
execution_id_ = OB_INVALID_ID;
for (int64_t i = 0; i < response_task_events_.count(); ++i) {
ObTaskCompleteEvent*& evt = response_task_events_.at(i);
if (NULL != evt) {
evt->~ObTaskCompleteEvent();
evt = NULL;
}
}
response_task_events_.reset();
// lock_.reset();
//==============dont change reset order=============
parser_.reset();
job_control_.reset();
spfactory_.reset();
//====================================================
exec_stat_collector_ = NULL;
trans_result_.reset();
should_stop_ = false;
finish_queue_.reset();
root_finish_ = false;
sche_finish_ = false;
sche_ret_ = OB_SUCCESS;
can_serial_exec_ = false;
sche_thread_started_ = false;
allocator_.reset();
scheduler_id_ = 0;
rpc_error_addrs_.reset();
}
int ObDistributedScheduler::merge_trans_result(const ObTaskCompleteEvent& task_event)
{
int ret = OB_SUCCESS;
if (OB_FAIL(
trans_result_.recv_result(task_event.get_task_location().get_ob_task_id(), task_event.get_trans_result()))) {
LOG_WARN("fail to merge trans result",
K(ret),
"task_id",
task_event.get_task_location().get_ob_task_id(),
"task_trans_result",
task_event.get_trans_result());
} else {
LOG_DEBUG("scheduler trans_result",
"task_id",
task_event.get_task_location().get_ob_task_id(),
"task_trans_result",
task_event.get_trans_result());
}
return ret;
}
int ObDistributedScheduler::set_task_status(const ObTaskID& task_id, ObTaskStatus status)
{
return trans_result_.set_task_status(task_id, status);
}
int ObDistributedScheduler::signal_finish_queue(const ObTaskCompleteEvent& task_event)
{
int ret = OB_SUCCESS;
int64_t idx = -1;
void* ptr_casted_from_idx = NULL;
if (OB_UNLIKELY(!task_event.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("task event is invalid", K(ret), K(task_event));
} else {
void* evt_ptr = NULL;
ObTaskCompleteEvent* evt = NULL;
{
ObLockGuard<ObSpinLock> lock_guard(lock_);
if (OB_ISNULL(evt_ptr = allocator_.alloc(sizeof(ObTaskCompleteEvent)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc ObTaskCompleteEvent", K(ret));
}
}
if (OB_FAIL(ret)) {
} else if (OB_ISNULL(evt = new (evt_ptr) ObTaskCompleteEvent())) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_ERROR("fail to new ObTaskCompleteEvent", K(ret));
} else {
ObLockGuard<ObSpinLock> lock_guard(lock_);
if (OB_FAIL(evt->assign(allocator_, task_event))) {
LOG_WARN("fail to assign task event", K(ret));
evt->~ObTaskCompleteEvent();
evt = NULL;
}
}
if (OB_SUCC(ret)) {
ObLockGuard<ObSpinLock> lock_guard(lock_);
if (OB_FAIL(response_task_events_.push_back(evt))) {
LOG_WARN("fail to push back task event into response_task_events_ array", K(ret), K(evt), K(task_event));
evt->~ObTaskCompleteEvent();
evt = NULL;
} else {
idx = response_task_events_.count() - 1;
}
}
}
if (OB_FAIL(ret)) {
} else if (OB_UNLIKELY(idx < 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("invalid idx, it is < 0", K(ret), K(idx));
} else if (OB_ISNULL(ptr_casted_from_idx = idx_to_ptr(idx))) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("ptr casted from idx is NULL", K(ret), K(idx));
} else {
int64_t finish_queue_size = finish_queue_.size();
if (OB_UNLIKELY(finish_queue_size > 128)) {
LOG_ERROR("finish queue size > 128", K(finish_queue_size));
}
if (OB_FAIL(finish_queue_.push(ptr_casted_from_idx))) {
LOG_WARN("fail to push task event into finish queue", K(ret), K(ptr_casted_from_idx), K(idx), K(task_event));
}
}
return ret;
}
int ObDistributedScheduler::atomic_push_err_rpc_addr(const common::ObAddr& addr)
{
int ret = OB_SUCCESS;
ObSpinLockGuard guard(lock_);
if (OB_FAIL(rpc_error_addrs_.push_back(addr))) {
LOG_WARN("fail to push addr", K(ret));
}
return ret;
}
int ObDistributedScheduler::pop_task_result_for_root(
ObExecContext& ctx, uint64_t root_op_id, ObTaskResult& task_result, int64_t timeout_timestamp)
{
int ret = OB_SUCCESS;
ObJob* job = NULL;
int64_t task_id = -1;
if (OB_FAIL(job_control_.find_job_by_root_op_id(root_op_id, job))) {
LOG_WARN("fail to find job", K(ret), K(root_op_id));
} else if (OB_ISNULL(job)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("job is null", K(ret));
} else if (OB_FAIL(pop_task_idx(ctx,
job->get_finish_queue(),
timeout_timestamp,
&ObDistributedScheduler::check_schedule_error,
task_id))) {
ObQueryRetryInfo& retry_info = ctx.get_scheduler_thread_ctx().get_scheduler_retry_info_for_update();
int tmp_ret = OB_SUCCESS;
if (OB_SUCCESS !=
(tmp_ret = ObMiniTaskExecutor::add_invalid_servers_to_retry_info(ret, rpc_error_addrs_, retry_info))) {
LOG_WARN("fail to add invalid servers to retry info", K(tmp_ret));
}
LOG_WARN("fail to pop task event idx", K(ret));
} else if (FALSE_IT(sche_finish_)) {
} else if (OB_FAIL(check_schedule_error())) {
ObQueryRetryInfo& retry_info = ctx.get_scheduler_thread_ctx().get_scheduler_retry_info_for_update();
int tmp_ret = OB_SUCCESS;
if (OB_SUCCESS !=
(tmp_ret = ObMiniTaskExecutor::add_invalid_servers_to_retry_info(ret, rpc_error_addrs_, retry_info))) {
LOG_WARN("fail to add invalid servers to retry info", K(tmp_ret));
}
LOG_WARN("schecule error", K(ret));
} else if (SCHE_ITER_END == task_id) {
ret = OB_ITER_END;
} else if (OB_FAIL(job->get_task_result(task_id, task_result))) {
LOG_WARN("fail to get task result", K(ret), K(task_id));
}
return ret;
}
int ObDistributedScheduler::pop_task_event_for_sche(
const ObExecContext& ctx, ObTaskCompleteEvent*& task_event, int64_t timeout_timestamp)
{
int ret = OB_SUCCESS;
int64_t task_event_idx = -1;
if (OB_FAIL(pop_task_idx(
ctx, finish_queue_, timeout_timestamp, &ObDistributedScheduler::check_root_finish, task_event_idx))) {
LOG_WARN("fail to pop task event idx", K(ret));
} else if (OB_FAIL(check_root_finish())) {
LOG_WARN("root finish", K(ret));
} else if (FALSE_IT(task_event_idx < 0)) {
} else if (OB_FAIL(get_task_event(task_event_idx, task_event))) {
LOG_WARN("fail to get task event", K(ret));
}
return ret;
}
int ObDistributedScheduler::pop_task_idx(const ObExecContext& ctx, ObLightyQueue& finish_queue,
int64_t timeout_timestamp, ObCheckStatus check_func, int64_t& task_idx)
{
int ret = OB_SUCCESS;
void* ptr_casted_from_idx = NULL;
static const int64_t MAX_TIMEOUT_ONCE = 100000; // 100ms
const ObSQLSessionInfo* session = ctx.get_my_session();
if (OB_ISNULL(session)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("session is NULL", K(ret));
} else {
bool is_pop_timeout = true;
int64_t wait_count = 0; // for debug purpose
while (is_pop_timeout) {
int64_t timeout_left = timeout_timestamp - ObTimeUtility::current_time();
is_pop_timeout = false;
if (OB_UNLIKELY(should_stop_)) {
ret = OB_ERR_INTERRUPTED;
LOG_WARN("distributed scheduler is interrupted, server is stopping",
K(ret),
K(wait_count),
K(timeout_left),
K(timeout_timestamp));
} else if (OB_UNLIKELY(session->is_terminate(ret))) {
LOG_WARN("query or session is killed", K(ret), K(wait_count), K(timeout_left), K(timeout_timestamp));
} else if (!OB_ISNULL(check_func) && OB_FAIL((this->*check_func)())) {
LOG_WARN("check some status to exit", K(wait_count), K(ret));
} else if (timeout_left <= 0) {
ret = OB_TIMEOUT;
LOG_WARN("fail pop task event from finish queue, timeout",
K(ret),
K(wait_count),
K(timeout_left),
K(timeout_timestamp));
} else {
int64_t timeout = MIN(timeout_left, MAX_TIMEOUT_ONCE);
ret = finish_queue.pop(ptr_casted_from_idx, timeout);
if (OB_ENTRY_NOT_EXIST == ret) {
is_pop_timeout = true;
ret = OB_SUCCESS;
}
if (OB_FAIL(ret)) {
LOG_WARN("fail pop task event from finish queue", K(ret), K(timeout_timestamp));
}
}
wait_count++;
}
}
if (OB_FAIL(ret)) {
} else {
task_idx = ptr_to_idx(ptr_casted_from_idx);
}
return ret;
}
int ObDistributedScheduler::get_task_event(int64_t task_event_idx, ObTaskCompleteEvent*& task_event)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(task_event_idx < 0 || task_event_idx >= response_task_events_.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("invalid task event idx", K(ret), K(task_event_idx));
} else if (OB_ISNULL(task_event = response_task_events_.at(task_event_idx))) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("task event is NULL", K(ret), K(task_event_idx));
} else if (OB_UNLIKELY(!task_event->is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("task event is invalid", K(ret), K(*task_event));
} else if (OB_UNLIKELY(!task_event->get_task_location().is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("task location is invalid", K(ret), K(*task_event));
}
return ret;
}
int ObDistributedScheduler::signal_root_finish()
{
root_finish_ = true;
return finish_queue_.push(idx_to_ptr(NOP_EVENT));
}
int ObDistributedScheduler::signal_job_iter_end(ObLightyQueue& finish_queue)
{
return finish_queue.push(idx_to_ptr(SCHE_ITER_END));
}
int ObDistributedScheduler::signal_schedule_error(int sche_ret)
{
int ret = OB_SUCCESS;
sche_ret_ = sche_ret;
ObArray<ObJob*> all_jobs;
if (OB_FAIL(job_control_.get_all_jobs(all_jobs))) {
LOG_WARN("fail to get all jobs", K(ret));
} else {
ObJob* job = NULL;
int signal_ret = OB_SUCCESS;
// root may wait on any finish queue in all jobs, so signal all jobs.
for (int64_t i = 0; OB_SUCC(ret) && i < all_jobs.count(); i++) {
// continue even if this job is NULL, or something wrong.
if (OB_ISNULL(job = all_jobs.at(i))) {
LOG_WARN("job is NULL", K(i));
} else if (OB_SUCCESS != (signal_ret = job->signal_schedule_error(NOP_EVENT))) {
LOG_WARN("fail to signal schedule error", K(signal_ret), K(i), K(*job));
}
}
}
return ret;
}
int ObDistributedScheduler::wait_root_use_up_data(ObExecContext& ctx)
{
int ret = OB_SUCCESS;
static const int64_t MAX_TIMEOUT_STEP = 1000000; // 1s
ObPhysicalPlanCtx* plan_ctx = NULL;
void* ptr_casted_from_idx = NULL;
int64_t wait_count = 0;
if (OB_ISNULL(plan_ctx = ctx.get_physical_plan_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("plan ctx is NULL", K(ret));
}
while (OB_SUCC(ret) && !root_finish_) {
ret = OB_ENTRY_NOT_EXIST;
int64_t timeout_timestamp = plan_ctx->get_timeout_timestamp();
while (OB_ENTRY_NOT_EXIST == ret) {
int64_t timeout_left = timeout_timestamp - ObTimeUtility::current_time();
if (root_finish_) {
ret = OB_SUCCESS;
} else if (timeout_left <= 0) {
ret = OB_TIMEOUT;
LOG_WARN("fail pop task event from finish queue, timeout",
K(ret),
K(timeout_left),
K(timeout_timestamp),
K(root_finish_),
K(wait_count));
} else {
int64_t timeout = MIN(timeout_left, MAX_TIMEOUT_STEP);
ret = finish_queue_.pop(ptr_casted_from_idx, timeout);
}
}
if (!root_finish_) {
if (OB_FAIL(ret)) {
if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_TIMEOUT;
}
LOG_WARN("fail pop task event from finish queue", K(ret), K(timeout_timestamp), K(wait_count));
} else {
int64_t task_idx = ptr_to_idx(ptr_casted_from_idx);
if (OB_UNLIKELY(NOP_EVENT == task_idx)) {
LOG_ERROR("root not finish, but can pop NOP_EVENT", K(ret), K(task_idx), K(root_finish_), K(wait_count));
}
}
}
wait_count++;
}
return ret;
}
int ObDistributedScheduler::signal_schedule_finish()
{
signal_can_serial_exec();
sche_finish_ = true;
sche_finish_cond_.signal();
return OB_SUCCESS;
}
int ObDistributedScheduler::wait_schedule_finish(/*int64_t timeout_timestamp*/)
{
int ret = OB_SUCCESS;
if (sche_thread_started_) {
int64_t wait_count = 0;
while (OB_SUCC(ret) && !sche_finish_) {
sche_finish_cond_.timedwait(1000000);
if (!sche_finish_) {
wait_count++;
LOG_INFO("wait for schedule finish", K(wait_count), K_(sche_finish));
}
}
}
return ret;
}
int ObDistributedScheduler::signal_can_serial_exec()
{
can_serial_exec_ = true;
can_serial_exec_cond_.signal();
return OB_SUCCESS;
}
int ObDistributedScheduler::wait_can_serial_exec(ObExecContext& ctx, int64_t timeout_timestamp)
{
int ret = OB_SUCCESS;
static const int64_t MAX_TIMEOUT_ONCE = 100000; // 100ms
const ObSQLSessionInfo* session = ctx.get_my_session();
if (OB_ISNULL(session)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("session is NULL", K(ret));
} else {
int64_t wait_count = 0; // for debug purpose
while (OB_SUCC(ret) && !can_serial_exec_) {
int64_t timeout_left = timeout_timestamp - ObTimeUtility::current_time();
if (OB_UNLIKELY(should_stop_)) {
ret = OB_ERR_INTERRUPTED;
LOG_WARN("distributed scheduler is interrupted, server is stopping",
K(ret),
K(wait_count),
K(timeout_left),
K(timeout_timestamp));
} else if (OB_UNLIKELY(session->is_terminate(ret))) {
ret = OB_ERR_INTERRUPTED;
LOG_WARN("query or session is killed", K(ret), K(wait_count), K(timeout_left), K(timeout_timestamp));
} else if (OB_FAIL(check_schedule_error())) {
LOG_WARN("check some status to exit", K(wait_count), K(ret));
} else if (timeout_left <= 0) {
ret = OB_TIMEOUT;
LOG_WARN("fail pop task event from finish queue, timeout",
K(ret),
K(wait_count),
K(timeout_left),
K(timeout_timestamp));
} else {
int64_t timeout = MIN(timeout_left, MAX_TIMEOUT_ONCE);
can_serial_exec_cond_.timedwait(timeout);
}
wait_count++;
}
if (OB_SUCC(ret) && can_serial_exec_) {
ctx.merge_final_trans_result();
}
}
return ret;
}
int ObDistributedScheduler::parse_all_jobs_and_start_root_job(ObExecContext& ctx, ObPhysicalPlan* phy_plan)
{
int ret = OB_SUCCESS;
ObTaskExecutorCtx* task_exe_ctx = ctx.get_task_executor_ctx();
ObJob* root_job = NULL;
if (OB_ISNULL(task_exe_ctx) || OB_ISNULL(phy_plan)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("executor_ctx or phy_plan is NULL", K(ret), K(task_exe_ctx), K(phy_plan));
} else {
ObExecutionID ob_execution_id;
ob_execution_id.set_server(ObSqlExecutionIDMap::is_outer_id(execution_id_) ? ObExecutionID::global_id_addr()
: task_exe_ctx->get_self_addr());
ob_execution_id.set_execution_id(execution_id_);
if (OB_FAIL(parser_.parse_job(ctx, phy_plan, ob_execution_id, spfactory_, job_control_))) {
LOG_WARN("fail parse job for scheduler", K(ret));
} else if (OB_FAIL(job_control_.get_root_job(root_job))) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("fail get root job", K(ret));
} else {
ObLocalTaskExecutor local_task_executor;
ObLocalJobExecutor local_job_executor;
local_job_executor.set_task_executor(local_task_executor);
local_job_executor.set_job(*root_job);
if (OB_FAIL(local_job_executor.execute(ctx))) {
LOG_WARN("fail execute root job");
}
}
}
return ret;
}
int ObDistributedScheduler::schedule(ObExecContext& ctx, ObPhysicalPlan* phy_plan)
{
UNUSED(phy_plan);
NG_TRACE(distributed_schedule_begin);
int ret = OB_SUCCESS;
scheduler_id_ = next_scheduler_id();
ObSchedulerThreadCtx& sche_thread_ctx = ctx.get_scheduler_thread_ctx();
ObPhysicalPlanCtx* plan_ctx = ctx.get_physical_plan_ctx();
ObSQLSessionInfo* my_session = ctx.get_my_session();
bool need_serial_exec = false;
const ObPhysicalPlan* phy_plan_ptr = NULL;
if (OB_ISNULL(plan_ctx) || OB_ISNULL(my_session) || OB_ISNULL(phy_plan_ptr = plan_ctx->get_phy_plan())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("plan_ctx is NULL", K(ret), K(plan_ctx));
} else {
need_serial_exec = (phy_plan_ptr->get_need_serial_exec() || my_session->need_serial_exec());
}
int kill_ret = OB_SUCCESS;
ObArray<ObJob*> ready_jobs;
ObDistributedTaskExecutor task_exe(scheduler_id_);
ObDistributedJobExecutor job_exe;
ObDistributedTaskExecutor notify_task_exe(scheduler_id_);
ObDistributedJobExecutor notify_job_exe;
task_exe.set_trans_result(&trans_result_);
notify_task_exe.set_trans_result(&trans_result_);
while (OB_SUCC(ret)) {
if (OB_FAIL(job_control_.get_ready_jobs(ready_jobs, need_serial_exec))) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_ERROR("fail get ready jobs", K(ret));
}
} else {
// not all finished, must continue running
LOG_DEBUG("ready jobs", K(ready_jobs), "count", ready_jobs.count());
for (int64_t i = 0; OB_SUCC(ret) && i < ready_jobs.count(); ++i) {
task_exe.reset();
job_exe.reset();
ObJob* ready_job = ready_jobs.at(i);
job_exe.set_task_executor(task_exe);
job_exe.set_job(*ready_job);
if (OB_FAIL(job_exe.execute_step(ctx))) {
if (OB_ITER_END != ret) {
LOG_WARN("fail execute task", K(ret));
}
}
}
if (OB_FAIL(ret) && OB_ITER_END != ret) {
if (OB_SUCCESS != (kill_ret = kill_all_jobs(ctx, job_control_))) {
LOG_WARN("fail to kill all jobs", K(kill_ret));
}
} else {
// wait one object
ObTaskCompleteEvent* task_event = NULL;
if (OB_FAIL(pop_task_event_for_sche(ctx, task_event, plan_ctx->get_timeout_timestamp()))) {
LOG_WARN("fail to pop task event", K(ret));
} else if (OB_ISNULL(task_event)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("task event is NULL", K(ret));
} else if (OB_FAIL(plan_ctx->merge_implicit_cursors(task_event->get_implicit_cursors()))) {
LOG_WARN("merge implicit cursor failed", K(ret), K(task_event->get_implicit_cursors()));
} else {
// receive a task event, then update job state
ObJob* notify_job = NULL;
ObTaskLocation task_loc = task_event->get_task_location();
bool job_finished = false;
if (OB_FAIL(job_control_.find_job_by_job_id(task_loc.get_job_id(), notify_job))) {
LOG_ERROR("can not find job", K(ret), K(task_loc.get_job_id()));
} else if (OB_ISNULL(notify_job)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("notify job is NULL", K(ret), K(task_loc.get_job_id()));
} else if (OB_FAIL(notify_job->update_job_state(ctx, *task_event, job_finished))) {
LOG_WARN("fail to update job state", K(task_loc.get_job_id()), K(task_loc.get_task_id()), K(ret));
} else if (OB_FAIL(static_cast<int>(task_event->get_err_code()))) {
if (OB_ERR_TASK_SKIPPED == ret) {
ret = OB_SUCCESS;
} else {
LOG_WARN("task event error code is not success or skipped", K(ret), K(*task_event));
}
}
if (OB_FAIL(ret)) {
} else if (job_finished) {
signal_job_iter_end(notify_job->get_finish_queue());
} else if (!notify_job->all_tasks_run()) {
notify_task_exe.reset();
notify_job_exe.reset();
notify_job_exe.set_task_executor(notify_task_exe);
notify_job_exe.set_job(*notify_job);
if (OB_FAIL(notify_job_exe.execute_step(ctx))) {
LOG_WARN("fail to executor step", K(ret), K(*notify_job));
}
}
}
}
}
}
if (OB_FAIL(ret) && OB_UNLIKELY(OB_ITER_END != ret)) {
int fail_ret = OB_SUCCESS;
fail_ret = OB_SUCCESS;
ObSEArray<ObTaskInfo*, 32> last_failed_task_infos;
if (OB_SUCCESS != (fail_ret = job_control_.get_last_failed_task_infos(last_failed_task_infos))) {
LOG_WARN("fail to get last failed task infos", K(ret), K(fail_ret));
}
if (last_failed_task_infos.count() > 0 &&
(fail_ret = sche_thread_ctx.init_last_failed_partition(last_failed_task_infos.count())) != OB_SUCCESS) {
LOG_WARN("init last failed partition info failed", K(ret), K(last_failed_task_infos.count()));
}
for (int64_t i = 0; OB_SUCCESS == fail_ret && i < last_failed_task_infos.count(); ++i) {
ObTaskInfo* last_failed_task_info = last_failed_task_infos.at(i);
if (OB_ISNULL(last_failed_task_info)) {
fail_ret = OB_ERR_UNEXPECTED;
LOG_WARN("last_failed_task_info is NULL", K(ret), K(fail_ret), K(i));
} else if (OB_SUCCESS !=
(fail_ret = sche_thread_ctx.add_last_failed_partition(last_failed_task_info->get_range_location()))) {
LOG_WARN("fail to add last failed partition", K(ret), K(fail_ret), K(i), K(*last_failed_task_info));
}
}
if (OB_SUCCESS != fail_ret) {
sche_thread_ctx.clear_last_failed_partitions();
}
fail_ret = OB_SUCCESS;
const static int64_t MAX_JC_STATUS_BUF_LEN = 4096;
char jc_status_buf[MAX_JC_STATUS_BUF_LEN];
if (OB_SUCCESS != (fail_ret = job_control_.print_status(jc_status_buf, MAX_JC_STATUS_BUF_LEN, true))) {
LOG_WARN("fail to print job control status", K(ret), K(fail_ret), LITERAL_K(MAX_JC_STATUS_BUF_LEN));
} else {
LOG_WARN("fail to schedule, print jobs' status", K(ret), K(fail_ret), "jobs_status", jc_status_buf);
}
if (OB_SUCCESS != (fail_ret = signal_schedule_error(ret))) {
LOG_WARN("fail to signal schedule error", K(fail_ret));
}
}
NG_TRACE(distributed_schedule_end);
return ret;
}
int ObDistributedScheduler::close_all_results(ObExecContext& ctx)
{
int ret = OB_SUCCESS;
ObSEArray<ObJob*, 2> all_jobs_except_root_job;
if (OB_FAIL(job_control_.get_all_jobs_except_root_job(all_jobs_except_root_job))) {
LOG_WARN("fail get all jobs");
} else {
int close_ret = OB_SUCCESS;
ObDistributedTaskExecutor task_exe(scheduler_id_);
ObDistributedJobExecutor job_exe;
for (int64_t i = 0; OB_SUCC(ret) && i < all_jobs_except_root_job.count(); ++i) {
ObJob* job = all_jobs_except_root_job.at(i);
if (OB_ISNULL(job)) {
LOG_ERROR("job is NULL", K(ret), K(i));
} else {
task_exe.reset();
job_exe.reset();
job_exe.set_task_executor(task_exe);
job_exe.set_job(*job);
if (OB_SUCCESS != (close_ret = job_exe.close_all_results(ctx))) {
LOG_WARN("fail close all results", K(close_ret), K(ret), K(i), K(*job));
}
}
}
}
return ret;
}
int ObDistributedScheduler::kill_all_jobs(ObExecContext& ctx, ObJobControl& jc)
{
int ret = OB_SUCCESS;
ObArray<ObJob*> running_jobs;
if (OB_FAIL(jc.get_running_jobs(running_jobs))) {
LOG_WARN("fail to get running jobs", K(ret));
} else {
ObDistributedTaskExecutor distributed_task_executor(scheduler_id_);
ObDistributedJobExecutor distributed_job_executor;
for (int64_t i = 0; OB_SUCC(ret) && i < running_jobs.count(); ++i) {
ObJob* running_job = running_jobs.at(i);
if (OB_ISNULL(running_job)) {
LOG_WARN("running job is NULL", K(ret), K(i));
} else {
int kill_ret = OB_SUCCESS;
distributed_task_executor.reset();
distributed_job_executor.reset();
distributed_job_executor.set_task_executor(distributed_task_executor);
distributed_job_executor.set_job(*running_job);
if (OB_SUCCESS != (kill_ret = distributed_job_executor.kill_job(ctx))) {
LOG_WARN("fail to kill job", K(kill_ret), K(ret), K(i), K(*running_job));
}
}
}
}
return ret;
}
uint64_t ObDistributedScheduler::next_scheduler_id()
{
static volatile uint64_t g_scheduler_id = 0;
uint64_t id = 0;
while (0 == (id = ATOMIC_AAF(&g_scheduler_id, 1))) {}
return id;
}
void ObSchedulerThreadPool::handle(void* task)
{
int ret = OB_SUCCESS;
ObDistributedSchedulerManager* scheduler_mgr = NULL;
ObDistributedSchedulerCtx* scheduler_ctx = NULL;
const uint64_t* trace_id = NULL;
ObDistributedExecContext dis_exec_ctx(GCTX.session_mgr_);
if (OB_ISNULL(scheduler_mgr = ObDistributedSchedulerManager::get_instance())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("scheduler manager is NULL", K(ret));
} else if (OB_ISNULL(scheduler_ctx = static_cast<ObDistributedSchedulerCtx*>(task))) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("scheduler ctx is null", K(ret));
} else if (OB_ISNULL(trace_id = scheduler_ctx->trace_id_)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("trace id is null", K(ret), K(trace_id));
} else {
ObCurTraceId::set(trace_id);
if (OB_FAIL(scheduler_mgr->do_schedule(*scheduler_ctx, dis_exec_ctx))) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("fail to schedule", K(ret));
}
}
}
return;
}
ObDistributedSchedulerManager::ObDistributedSchedulerHolder::ObDistributedSchedulerHolder()
: inited_(false), execution_id_(OB_INVALID_ID), scheduler_(NULL), execution_id_map_(NULL)
{}
ObDistributedSchedulerManager::ObDistributedSchedulerHolder::~ObDistributedSchedulerHolder()
{
reset();
}
void ObDistributedSchedulerManager::ObDistributedSchedulerHolder::reset()
{
if (inited_) {
if (OB_ISNULL(execution_id_map_)) {
LOG_ERROR("execution_id_map_ is NULL", K(execution_id_), K(scheduler_));
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id_)) {
LOG_ERROR("execution_id_ is invalid", K(execution_id_map_), K(scheduler_));
} else {
execution_id_map_->revert(execution_id_);
}
inited_ = false;
execution_id_ = OB_INVALID_ID;
scheduler_ = NULL;
execution_id_map_ = NULL;
}
}
int ObDistributedSchedulerManager::ObDistributedSchedulerHolder::init(
ObDistributedScheduler* scheduler, uint64_t execution_id, ExecutionIDMap& execution_id_map)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(inited_)) {
ret = OB_INIT_TWICE;
LOG_ERROR("init twice", K(ret), K(execution_id_), K(execution_id));
} else if (OB_ISNULL(scheduler)) {
ret = OB_INVALID_ARGUMENT;
LOG_ERROR("scheduler is NULL", K(ret), K(scheduler), K(execution_id));
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_ERROR("execution_id is invalid", K(ret), K(scheduler), K(execution_id));
} else {
inited_ = true;
execution_id_ = execution_id;
scheduler_ = scheduler;
execution_id_map_ = &execution_id_map;
}
return ret;
}
int ObDistributedSchedulerManager::ObDistributedSchedulerHolder::get_scheduler(ObDistributedScheduler*& scheduler)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_ERROR("scheduler holder not init", K(ret), K(execution_id_), K(scheduler_));
} else if (OB_ISNULL(scheduler_)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("scheduler_ is NULL", K(ret), K(execution_id_), K(scheduler_));
} else {
scheduler = scheduler_;
}
return ret;
}
void ObDistributedSchedulerManager::ObDistributedSchedulerKiller::operator()(const uint64_t execution_id)
{
int ret = OB_SUCCESS;
ObDistributedSchedulerManager* scheduler_mgr = ObDistributedSchedulerManager::get_instance();
ObDistributedScheduler* scheduler = NULL;
if (OB_ISNULL(scheduler_mgr)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("scheduler manager is NULL", K(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_ERROR("invalid execution id", K(ret), K(execution_id));
} else if (OB_ISNULL(scheduler = scheduler_mgr->execution_id_map_.fetch(execution_id))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler from id map, maybe it has been deleted", K(ret), K(execution_id));
} else {
if (OB_FAIL(scheduler->stop())) {
LOG_WARN("fail to stop scheduler", K(ret), K(execution_id));
}
scheduler_mgr->execution_id_map_.revert(execution_id);
}
}
ObDistributedSchedulerManager::ObDistributedSchedulerManager()
: inited_(false), execution_id_map_(), is_stopping_(false), distributed_scheduler_killer_()
{}
ObDistributedSchedulerManager::~ObDistributedSchedulerManager()
{}
void ObDistributedSchedulerManager::reset()
{
inited_ = false;
execution_id_map_.destroy();
is_stopping_ = false;
distributed_scheduler_killer_.reset();
}
int ObDistributedSchedulerManager::build_instance()
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(NULL != instance_)) {
ret = OB_INIT_TWICE;
LOG_ERROR("instance is not NULL, build twice", K(ret));
} else if (OB_UNLIKELY(NULL == (instance_ = OB_NEW(ObDistributedSchedulerManager, ObModIds::OB_SQL_EXECUTOR)))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_ERROR("instance is NULL, unexpected", K(ret));
} else if (OB_FAIL(instance_->init())) {
OB_DELETE(ObDistributedSchedulerManager, ObModIds::OB_SQL_EXECUTOR, instance_);
instance_ = NULL;
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("fail to init distributed scheduler", K(ret));
} else {
// empty
}
return ret;
}
ObDistributedSchedulerManager* ObDistributedSchedulerManager::get_instance()
{
ObDistributedSchedulerManager* instance = NULL;
if (OB_ISNULL(instance_) || OB_UNLIKELY(!instance_->inited_)) {
LOG_ERROR("instance is NULL or not inited", K(instance_));
} else {
instance = instance_;
}
return instance;
}
int ObDistributedSchedulerManager::init()
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(inited_)) {
ret = OB_INIT_TWICE;
LOG_WARN("distributed scheduler manager init twice", K(ret));
} else if (OB_FAIL(execution_id_map_.init(!lib::is_mini_mode() ? DEFAULT_ID_MAP_SIZE : MINI_MODE_ID_MAP_SIZE))) {
LOG_WARN("fail to init execution id map", K(ret), LITERAL_K(DEFAULT_ID_MAP_SIZE));
} else if (OB_FAIL(TG_SET_HANDLER_AND_START(lib::TGDefIDs::SqlDistSched, scheduler_pool_))) {
LOG_WARN("fail to init scheduler pool", K(ret));
} else {
inited_ = true;
}
return ret;
}
int ObDistributedSchedulerManager::alloc_scheduler(ObExecContext& ctx, uint64_t& execution_id)
{
int ret = OB_SUCCESS;
UNUSED(ctx);
ObDistributedScheduler* scheduler = NULL;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(ATOMIC_LOAD(&is_stopping_))) {
ret = OB_SERVER_IS_STOPPING;
LOG_WARN("server is stopping", K(ret));
} else {
const uint64_t outer_id = ctx.get_execution_id();
execution_id = OB_INVALID_ID;
if (OB_ISNULL(scheduler = OB_NEW(ObDistributedScheduler, ObModIds::OB_SQL_EXECUTOR_SCHEDULER))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_ERROR("fail allocate memory for distributed scheduler", K(ret));
} else if (OB_FAIL(scheduler->init())) {
OB_DELETE(ObDistributedScheduler, ObModIds::OB_SQL_EXECUTOR_SCHEDULER, scheduler);
LOG_WARN("fail to init scheduler", K(ret));
} else if (OB_UNLIKELY(ATOMIC_LOAD(&is_stopping_))) {
OB_DELETE(ObDistributedScheduler, ObModIds::OB_SQL_EXECUTOR_SCHEDULER, scheduler);
ret = OB_SERVER_IS_STOPPING;
LOG_WARN("server is stopping", K(ret));
} else {
if (OB_INVALID_ID != outer_id && ObSqlExecutionIDMap::is_outer_id(outer_id)) {
if (OB_FAIL(execution_id_map_.assign_external_id(outer_id, scheduler))) {
LOG_WARN("assign id to idmap failed", K(ret));
} else {
execution_id = outer_id;
}
} else {
if (OB_FAIL(execution_id_map_.assign(scheduler, execution_id))) {
LOG_WARN("fail set scheduler to map", K(ret));
}
}
if (OB_FAIL(ret)) {
OB_DELETE(ObDistributedScheduler, ObModIds::OB_SQL_EXECUTOR_SCHEDULER, scheduler);
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("scheduler id is invalid", K(ret), K(execution_id));
} else {
scheduler->set_execution_id(execution_id);
}
}
}
return ret;
}
int ObDistributedSchedulerManager::close_scheduler(ObExecContext& ctx, uint64_t execution_id)
{
int ret = OB_SUCCESS;
ObDistributedScheduler* scheduler = NULL;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid execution id", K(ret), K(execution_id));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(execution_id))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler from id map", K(ret), K(execution_id));
} else {
if (OB_FAIL(scheduler->signal_root_finish())) {
LOG_WARN("signal root finish failed", K(ret));
}
if (OB_FAIL(scheduler->wait_schedule_finish())) {
LOG_WARN("wait schedule finish failed", K(ret));
}
execution_id_map_.revert(execution_id);
int merge_ret = OB_SUCCESS;
if (OB_UNLIKELY(OB_SUCCESS != (merge_ret = ctx.merge_scheduler_info()))) {
LOG_WARN("fail to merge scheduler_retry info", K(ret), K(merge_ret));
}
}
return ret;
}
int ObDistributedSchedulerManager::free_scheduler(uint64_t execution_id)
{
int ret = OB_SUCCESS;
ObDistributedScheduler* scheduler = NULL;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid execution id", K(ret), K(execution_id));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(execution_id, FM_MUTEX_BLOCK))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler from id map", K(ret), K(execution_id));
} else {
OB_DELETE(ObDistributedScheduler, ObModIds::OB_SQL_EXECUTOR_SCHEDULER, scheduler);
execution_id_map_.revert(execution_id, true);
}
return ret;
}
int ObDistributedSchedulerManager::get_scheduler(uint64_t execution_id, ObDistributedSchedulerHolder& scheduler_holder)
{
int ret = OB_SUCCESS;
ObDistributedScheduler* scheduler = NULL;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid execution id", K(ret), K(execution_id));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(execution_id))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler from id map", K(ret), K(execution_id));
} else if (OB_FAIL(scheduler_holder.init(scheduler, execution_id, execution_id_map_))) {
LOG_WARN("fail to init scheduler holder", K(ret), K(execution_id));
}
return ret;
}
int ObDistributedSchedulerManager::parse_jobs_and_start_sche_thread(
uint64_t execution_id, ObExecContext& ctx, ObPhysicalPlan* phy_plan, int64_t timeout_timestamp)
{
int ret = OB_SUCCESS;
ObDistributedScheduler* scheduler = NULL;
ObExecutorRpcImpl* exec_rpc = NULL;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid execution id", K(ret), K(execution_id));
} else if (OB_ISNULL(phy_plan)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("phy plan is NULL", K(ret));
} else if (OB_FAIL(ObTaskExecutorCtxUtil::get_task_executor_rpc(ctx, exec_rpc))) {
LOG_WARN("get task executor rpc failed", K(ret));
} else if (OB_ISNULL(ctx.get_my_session())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("session is NULL", K(ret));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(execution_id))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler", K(ret), K(execution_id));
} else {
if (OB_FAIL(scheduler->parse_all_jobs_and_start_root_job(ctx, phy_plan))) {
LOG_WARN("fail to parse all jobs", K(ret), K(execution_id));
} else if (OB_FAIL(scheduler->init_trans_result(*ctx.get_my_session(), exec_rpc))) {
LOG_WARN("fail to init trans result", K(ret), K(execution_id));
} else {
scheduler->set_exec_stat_collector(&ctx.get_exec_stat_collector());
}
execution_id_map_.revert(execution_id);
if (OB_FAIL(ret)) {
} else {
ObDistributedSchedulerCtx* scheduler_ctx =
static_cast<ObDistributedSchedulerCtx*>(ctx.get_allocator().alloc(sizeof(ObDistributedSchedulerCtx)));
int64_t buf_len = ctx.get_serialize_size();
char* exec_ctx_buf = static_cast<char*>(ctx.get_allocator().alloc(buf_len));
ObPhysicalPlanCtx* phy_plan_ctx = ctx.get_physical_plan_ctx();
ObSQLSessionInfo* my_session = ctx.get_my_session();
int64_t pos = 0;
if (OB_ISNULL(scheduler_ctx) || OB_ISNULL(exec_ctx_buf) || OB_ISNULL(my_session)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("some value is null", K(ret), K(scheduler_ctx), K(exec_ctx_buf), K(execution_id));
} else if (OB_ISNULL(phy_plan_ctx)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("physical plan context is null", K(ret), K(phy_plan_ctx), K(execution_id));
} else if (OB_FAIL(ctx.serialize(exec_ctx_buf, buf_len, pos))) {
LOG_WARN("fail to serialize exec ctx", K(ret));
} else {
scheduler_ctx->trace_id_ = ObCurTraceId::get();
scheduler_ctx->execution_id_ = execution_id;
scheduler_ctx->exec_ctx_ = &ctx;
scheduler_ctx->exec_ctx_buf_ = exec_ctx_buf;
scheduler_ctx->buf_len_ = buf_len;
ctx.get_scheduler_thread_ctx().set_build_index_plan(stmt::T_BUILD_INDEX_SSTABLE == phy_plan->get_stmt_type());
ctx.get_scheduler_thread_ctx().set_plain_select_stmt(phy_plan_ctx->is_plain_select_stmt());
if (OB_FAIL(TG_PUSH_TASK(lib::TGDefIDs::SqlDistSched, static_cast<void*>(scheduler_ctx)))) {
LOG_WARN("fail to start scheduler", K(ret), K(execution_id));
ret = (OB_EAGAIN == ret) ? OB_ERR_SCHEDULER_THREAD_NOT_ENOUGH : ret;
} else if (FALSE_IT(scheduler->set_sche_thread_started(true))) {
} else if ((phy_plan->get_need_serial_exec() || my_session->need_serial_exec()) &&
OB_FAIL(scheduler->wait_can_serial_exec(ctx, timeout_timestamp))) {
LOG_WARN("fail to wait can serial exec", K(ret), K(execution_id));
}
}
}
}
return ret;
}
int ObDistributedSchedulerManager::do_schedule(
ObDistributedSchedulerCtx& sched_ctx, ObDistributedExecContext& dis_exec_ctx)
{
int ret = OB_SUCCESS;
ObDistributedScheduler* scheduler = NULL;
ObPhysicalPlanCtx* plan_ctx = NULL;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_ERROR("manager not init", K(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == sched_ctx.execution_id_)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("execution id is invalid", K(ret), K(sched_ctx.execution_id_));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(sched_ctx.execution_id_))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_ERROR("fail to fetch scheduler", K(ret), K(sched_ctx.execution_id_));
} else {
ObExecContext* exec_ctx = NULL;
char* exec_ctx_buf = NULL;
int64_t buf_len = 0;
int64_t pos = 0;
ObPhysicalPlanCtx* dis_plan_ctx = nullptr;
if (OB_ISNULL(exec_ctx = sched_ctx.exec_ctx_) || OB_ISNULL(exec_ctx_buf = sched_ctx.exec_ctx_buf_) ||
(buf_len = sched_ctx.buf_len_) <= 0 || OB_ISNULL(plan_ctx = sched_ctx.exec_ctx_->get_physical_plan_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("exec ctx or exec ctx buf is invalid",
K(ret),
K(exec_ctx),
"exec ctx buf",
sched_ctx.exec_ctx_buf_,
"buf len",
sched_ctx.buf_len_);
} else if (OB_FAIL(dis_exec_ctx.deserialize(exec_ctx_buf, buf_len, pos))) {
LOG_WARN("fail to deserialize exec ctx", K(ret));
} else if (OB_ISNULL(dis_plan_ctx = dis_exec_ctx.get_physical_plan_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("distributed plan ctx is null", K(ret));
} else if (OB_FAIL(dis_plan_ctx->assign_batch_stmt_param_idxs(plan_ctx->get_batched_stmt_param_idxs()))) {
LOG_WARN("assign batch stmt param idxs failed", K(ret));
} else if (NULL == dis_exec_ctx.get_my_session()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("session is NULL", K(ret));
} else if (OB_FAIL(dis_exec_ctx.get_part_row_manager().assign(sched_ctx.exec_ctx_->get_part_row_manager()))) {
LOG_WARN("assign part row manager failed", K(ret));
} else if (FALSE_IT(exec_ctx->get_scheduler_thread_ctx().set_dis_exec_ctx(&dis_exec_ctx))) {
} else {
ObWorkerSessionGuard worker_session_guard(dis_exec_ctx.get_my_session());
ObSQLSessionInfo::LockGuard lock_guard(dis_exec_ctx.get_my_session()->get_query_lock());
// Only switch compatibility mode, do not switch tenant context by WITH_CONTEXT,
// because WITH_CONTEXT will fail if tenant has no unit on this scheduler in global index
// building which scheduled by RS.
share::CompatModeGuard compat_mode_guard(ORACLE_MODE == dis_exec_ctx.get_my_session()->get_compatibility_mode()
? share::ObWorker::CompatMode::ORACLE
: share::ObWorker::CompatMode::MYSQL);
if (OB_FAIL(scheduler->schedule(*exec_ctx, NULL))) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("fail to do schedule", K(ret), K(sched_ctx.execution_id_));
}
}
}
int wait_ret = OB_SUCCESS;
int close_ret = OB_SUCCESS;
int signal_ret = OB_SUCCESS;
if (OB_FAIL(ret) && OB_UNLIKELY(OB_ITER_END != ret)) {
if (OB_SUCCESS != (signal_ret = scheduler->signal_schedule_error(ret))) {
LOG_WARN("fail to signal schedule error", K(signal_ret));
}
}
if (!OB_ISNULL(plan_ctx)) {
if (OB_SUCCESS != (wait_ret = scheduler->wait_all_task(plan_ctx->get_timeout_timestamp(),
sched_ctx.exec_ctx_->get_scheduler_thread_ctx().is_build_index_plan()))) {
LOG_WARN("wait all task failed", K(wait_ret));
}
}
if (OB_SUCCESS != (signal_ret = scheduler->signal_can_serial_exec())) {
LOG_WARN("fail to signal can serial exec", K(signal_ret));
}
if (OB_SUCCESS != (wait_ret = scheduler->wait_root_use_up_data(*exec_ctx))) {
LOG_WARN("fail to wait root use up data", K(wait_ret), K(sched_ctx.execution_id_));
}
// Remove interm result after all task finished,
// make sure no interm result is added after remove.
if (NULL != exec_ctx && !exec_ctx->is_reusable_interm_result()) {
if (OB_UNLIKELY(OB_SUCCESS != (close_ret = scheduler->close_all_results(*exec_ctx)))) {
LOG_WARN("fail to close all results", K(close_ret), K(sched_ctx.execution_id_));
}
}
if (OB_LIKELY(NULL != exec_ctx)) {
exec_ctx->get_scheduler_thread_ctx().set_dis_exec_ctx(NULL);
}
if (OB_SUCCESS != (signal_ret = scheduler->signal_schedule_finish())) {
LOG_WARN("fail to signal schedule finish", K(signal_ret), K(sched_ctx.execution_id_));
}
execution_id_map_.revert(sched_ctx.execution_id_);
}
return ret;
}
int ObDistributedSchedulerManager::collect_extent_info(ObTaskCompleteEvent& task_event)
{
int ret = OB_SUCCESS;
ObTaskLocation task_loc;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(!task_event.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("task event is invalid", K(ret), K(task_event));
} else if (OB_UNLIKELY(!(task_loc = task_event.get_task_location()).is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("task location is invalid", K(ret), K(task_loc));
} else {
uint64_t execution_id = task_loc.get_execution_id();
ObDistributedScheduler* scheduler = NULL;
if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid scheduler id", K(ret), K(execution_id));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(execution_id))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler from id map", K(ret), K(execution_id));
} else {
ObExecStatCollector* collector = scheduler->get_exec_stat_collector();
if (NULL != collector) {
int cret = collector->add_raw_stat(task_event.get_extend_info());
if (OB_SUCCESS != cret) {
LOG_DEBUG("fail add raw stat to stat collector", K(cret));
}
}
execution_id_map_.revert(execution_id);
}
}
return ret;
}
int ObDistributedSchedulerManager::signal_schedule_error(
uint64_t execution_id, int sched_ret, const ObAddr addr, const uint64_t scheduler_id /* = 0 */)
{
int ret = OB_SUCCESS;
ObDistributedScheduler* scheduler = NULL;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid scheduler id", K(ret), K(execution_id));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(execution_id))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler from id map", K(ret), K(execution_id));
} else {
if (0 != scheduler_id && scheduler_id != scheduler->get_scheduler_id()) {
LOG_WARN("scheduler id mismatch, may be retry of index building, ignore",
K(execution_id),
K(scheduler_id),
K(scheduler->get_scheduler_id()));
} else {
if (addr.is_valid() && OB_RPC_CONNECT_ERROR == sched_ret && OB_FAIL(scheduler->atomic_push_err_rpc_addr(addr))) {
LOG_WARN("fail to atomic push err rpc addr", K(ret));
} else if (OB_FAIL(scheduler->signal_schedule_error(sched_ret))) {
LOG_ERROR("fail signal error to scheduler, may block scheduler thread", K(sched_ret), K(ret));
}
}
execution_id_map_.revert(execution_id);
}
return ret;
}
int ObDistributedSchedulerManager::signal_scheduler(
ObTaskCompleteEvent& task_event, const uint64_t scheduler_id /* = 0 */)
{
int ret = OB_SUCCESS;
ObTaskLocation task_loc;
ObDistributedScheduler* scheduler = NULL;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(!task_event.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("task event is invalid", K(ret), K(task_event));
} else if (OB_UNLIKELY(!(task_loc = task_event.get_task_location()).is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("task location is invalid", K(ret), K(task_loc));
} else {
uint64_t execution_id = task_loc.get_execution_id();
if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid scheduler id", K(ret), K(execution_id));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(execution_id))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler from id map", K(ret), K(execution_id));
} else {
if (0 != scheduler_id && scheduler_id != scheduler->get_scheduler_id()) {
LOG_WARN("scheduler id mismatch, may be retry of index building, ignore",
K(task_loc),
K(scheduler_id),
K(scheduler->get_scheduler_id()));
} else {
if (OB_FAIL(scheduler->signal_finish_queue(task_event))) {
LOG_WARN("fail to signal finish queue", K(ret), K(task_event));
}
}
execution_id_map_.revert(execution_id);
}
}
return ret;
}
int ObDistributedSchedulerManager::merge_trans_result(const ObTaskCompleteEvent& task_event)
{
int ret = OB_SUCCESS;
ObTaskLocation task_loc;
ObDistributedScheduler* scheduler = NULL;
/**
* TODO:
* we can refactor collect_extent_info() / signal_scheduler() / merge_trans_result()
* later by adding a function get_execution_id() for their common code.
* but remember that do not add execution_id_map_.fetch() into get_execution_id(),
* it looks like a lock operation followed by a necessary unlock operation by calling
* execution_id_map_.revert().
*/
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("manager not init", K(ret));
} else if (OB_UNLIKELY(!task_event.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("task event is invalid", K(ret), K(task_event));
} else if (OB_UNLIKELY(!(task_loc = task_event.get_task_location()).is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("task location is invalid", K(ret), K(task_loc));
} else {
uint64_t execution_id = task_loc.get_execution_id();
if (OB_UNLIKELY(OB_INVALID_ID == execution_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid scheduler id", K(ret), K(execution_id));
} else if (OB_ISNULL(scheduler = execution_id_map_.fetch(execution_id))) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("fail to fetch scheduler from id map", K(ret), K(execution_id));
} else {
if (OB_FAIL(scheduler->merge_trans_result(task_event))) {
LOG_WARN("fail to merge trans result", K(ret), K(task_event));
}
execution_id_map_.revert(execution_id);
}
}
return ret;
}
int ObDistributedSchedulerManager::set_task_status(const ObTaskID& task_id, ObTaskStatus status)
{
int ret = OB_SUCCESS;
uint64_t execution_id = task_id.get_execution_id();
ObDistributedScheduler* scheduler = NULL;
OV(inited_, OB_NOT_INIT);
OV(execution_id != OB_INVALID_ID, OB_INVALID_ARGUMENT);
OV(OB_NOT_NULL(scheduler = execution_id_map_.fetch(execution_id)), OB_INVALID_ARGUMENT, execution_id);
OZ(scheduler->set_task_status(task_id, status));
if (OB_NOT_NULL(scheduler)) {
// must revert if fetch success.
execution_id_map_.revert(execution_id);
}
return ret;
}
int ObDistributedSchedulerManager::stop()
{
if (OB_UNLIKELY(ATOMIC_LOAD(&is_stopping_))) {
LOG_WARN("server is already stopping, do nothing");
} else {
ATOMIC_STORE(&is_stopping_, true);
execution_id_map_.traverse(distributed_scheduler_killer_);
}
return OB_SUCCESS;
}
} // namespace sql
} // namespace oceanbase