[Fix] check data completement task before execute inner sql and kill session when task is cancel

This commit is contained in:
obdev
2022-11-28 06:05:54 +00:00
committed by ob-robot
parent d874825952
commit 038db1917d
39 changed files with 1472 additions and 200 deletions

View File

@ -27,6 +27,7 @@
#include "storage/tablet/ob_tablet.h"
#include "storage/tx_storage/ob_ls_handle.h"
#include "storage/tx_storage/ob_ls_map.h"
#include "rootserver/ob_root_service.h"
using namespace oceanbase::share;
using namespace oceanbase::common;
@ -707,7 +708,7 @@ int ObDDLUtil::get_tablet_leader_addr(
}
// Used in offline ddl to delete all checksum record in __all_ddl_checksum
// DELETE FROM __all_ddl_checksum WHERE
// DELETE FROM __all_ddl_checksum WHERE
int ObDDLUtil::clear_ddl_checksum(ObPhysicalPlan *phy_plan)
{
int ret = OB_SUCCESS;
@ -779,3 +780,614 @@ bool ObDDLUtil::need_remote_write(const int ret_code)
|| OB_LS_LOCATION_LEADER_NOT_EXIST == ret_code
|| OB_EAGAIN == ret_code;
}
int ObDDLUtil::get_tablet_paxos_member_list(
const uint64_t tenant_id,
const common::ObTabletID &tablet_id,
common::ObIArray<common::ObAddr> &paxos_server_list,
int64_t &paxos_member_count)
{
int ret = OB_SUCCESS;
ObLSLocation location;
paxos_member_count = 0;
if (OB_INVALID_TENANT_ID == tenant_id || !tablet_id.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get tablet replica location, invalid id", K(ret), K(tenant_id), K(tablet_id));
} else if (OB_FAIL(get_tablet_replica_location(tenant_id, tablet_id, location))) {
LOG_WARN("fail to get tablet replica location", K(tenant_id), K(tablet_id), K(ret));
} else {
const ObIArray<ObLSReplicaLocation> &ls_locations = location.get_replica_locations();
for (int64_t i = 0; i < ls_locations.count() && OB_SUCC(ret); ++i) {
common::ObReplicaType replica_type = ls_locations.at(i).get_replica_type();
if (REPLICA_TYPE_FULL != replica_type && REPLICA_TYPE_LOGONLY != replica_type) {
continue;
}
paxos_member_count++;
if (REPLICA_TYPE_FULL == replica_type) { // paxos replica
const ObAddr &server = ls_locations.at(i).get_server();
if (!has_exist_in_array(paxos_server_list, server) && OB_FAIL(paxos_server_list.push_back(server))) {
LOG_WARN("fail to push back addr", K(ret), K(server));
}
}
}
}
return ret;
}
int ObDDLUtil::get_tablet_replica_location(
const uint64_t tenant_id,
const common::ObTabletID &tablet_id,
ObLSLocation &location)
{
int ret = OB_SUCCESS;
const int64_t cluster_id = GCONF.cluster_id;
share::ObLSID ls_id;
int64_t expire_renew_time = INT64_MAX;
bool is_cache_hit = false;
if (OB_UNLIKELY(nullptr == GCTX.location_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("location service ptr is null", K(ret));
} else if (tenant_id == OB_INVALID_TENANT_ID || !tablet_id.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get tablet replica location, invalid id", K(ret), K(tenant_id), K(tablet_id));
} else if (OB_FAIL(GCTX.location_service_->get(tenant_id,
tablet_id,
INT64_MAX,
is_cache_hit,
ls_id))) {
LOG_WARN("fail to get ls id according to tablet_id", K(ret), K(tenant_id), K(tablet_id));
} else if (OB_FAIL(GCTX.location_service_->get(cluster_id,
tenant_id,
ls_id,
expire_renew_time,
is_cache_hit,
location))) {
LOG_WARN("fail to get ls location", K(ret), K(cluster_id), K(tenant_id), K(ls_id), K(tablet_id));
}
return ret;
}
int ObDDLUtil::get_sys_ls_leader_addr(
const uint64_t cluster_id,
const uint64_t tenant_id,
common::ObAddr &leader_addr)
{
int ret = OB_SUCCESS;
bool force_renew = false;
share::ObLocationService *location_service = nullptr;
share::ObLSID ls_id(share::ObLSID::SYS_LS_ID);
if (OB_ISNULL(location_service = GCTX.location_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to check and wait old completement, null pointer. ", K(ret));
} else if (OB_FAIL(location_service->get_leader(cluster_id,
tenant_id,
ls_id,
force_renew,
leader_addr))) {
LOG_WARN("failed to get ls_leader", K(ret));
} else if (OB_UNLIKELY(!leader_addr.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("leader addr is invalid", K(ret), K(tenant_id), K(leader_addr), K(cluster_id));
} else {
LOG_INFO("succ to get ls leader addr", K(cluster_id), K(tenant_id), K(leader_addr));
}
return ret;
}
/****************** ObCheckTabletDataComplementOp *************/
int ObCheckTabletDataComplementOp::check_task_inner_sql_session_status(
const common::ObAddr &inner_sql_exec_addr,
const common::ObCurTraceId::TraceId &trace_id,
const uint64_t tenant_id,
const int64_t schema_version,
const int64_t scn,
bool &is_old_task_session_exist)
{
int ret = OB_SUCCESS;
is_old_task_session_exist = false;
char ip_str[common::OB_IP_STR_BUFF];
rootserver::ObRootService *root_service = nullptr;
if (OB_ISNULL(root_service = GCTX.root_service_)) {
ret = OB_ERR_SYS;
LOG_WARN("fail to get sql proxy, root service is null.!");
} else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || trace_id.is_invalid() || !inner_sql_exec_addr.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(tenant_id), K(trace_id), K(inner_sql_exec_addr));
} else if (!inner_sql_exec_addr.ip_to_string(ip_str, sizeof(ip_str))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ip to string failed", K(ret), K(inner_sql_exec_addr));
} else {
ret = OB_SUCCESS;
common::ObMySQLProxy &proxy = root_service->get_sql_proxy();
ObSqlString sql_string;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
sqlclient::ObMySQLResult *result = NULL;
char trace_id_str[64] = { 0 };
char charater = '%';
if (OB_UNLIKELY(0 > trace_id.to_string(trace_id_str, sizeof(trace_id_str)))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get trace id string failed", K(ret), K(trace_id));
} else if (OB_FAIL(sql_string.assign_fmt(" SELECT id as session_id FROM %s WHERE trace_id = \"%s\" "
" and svr_ip = \"%s\" and svr_port = %d and info like \"%cINSERT%cINTO%cSELECT%c%ld%c%ld%c\" ",
OB_ALL_VIRTUAL_SESSION_INFO_TNAME,
trace_id_str,
ip_str,
inner_sql_exec_addr.get_port(),
charater,
charater,
charater,
charater,
schema_version,
charater,
scn,
charater ))) {
LOG_WARN("assign sql string failed", K(ret));
} else if (OB_FAIL(proxy.read(res, OB_SYS_TENANT_ID, sql_string.ptr(), &inner_sql_exec_addr))) {
LOG_WARN("query ddl task record failed", K(ret), K(sql_string));
} else if (OB_ISNULL((result = res.get_result()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get sql result", K(ret), KP(result));
} else {
uint64_t session_id = 0;
while (OB_SUCC(ret)) {
if (OB_FAIL(result->next())) {
if (OB_ITER_END == ret) {
LOG_INFO("success to get result, and no inner sql task", K(ret), K(sql_string.ptr()),
K(ip_str), K(trace_id_str), K(tenant_id), K(sql_string));
ret = OB_SUCCESS;
break;
} else {
LOG_WARN("fail to get next row", K(ret));
}
} else {
is_old_task_session_exist = true;
EXTRACT_UINT_FIELD_MYSQL(*result, "session_id", session_id, uint64_t);
LOG_INFO("succ to match inner sql session in trace id", K(ret), K(sql_string.ptr()),
K(session_id), K(tenant_id), K(ip_str), K(trace_id_str), K(sql_string));
}
}
}
}
}
return ret;
}
int ObCheckTabletDataComplementOp::update_replica_merge_status(
const ObTabletID &tablet_id,
const int merge_status,
hash::ObHashMap<ObTabletID, int32_t> &tablets_commited_map)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!tablet_id.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("update replica merge status fail.", K(ret));
} else {
int32_t commited_count = 0;
if (OB_SUCC(tablets_commited_map.get_refactored(tablet_id, commited_count))) {
// overwrite
if (true == merge_status) {
commited_count++;
if (OB_FAIL(tablets_commited_map.set_refactored(tablet_id, commited_count, true /* overwrite */))) {
LOG_WARN("fail to insert map status", K(ret));
}
}
} else if (OB_HASH_NOT_EXIST == ret) { // new insert
ret = OB_SUCCESS;
if (true == merge_status) {
commited_count = 1;
if (OB_FAIL(tablets_commited_map.set_refactored(tablet_id, commited_count, true /* overwrite */))) {
LOG_WARN("fail to insert map status", K(ret));
}
}
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to update replica merge status", K(ret));
}
LOG_INFO("success to update replica merge status.", K(tablet_id), K(merge_status));
}
return ret;
}
// only get un-merge tablet replica ip addr
int ObCheckTabletDataComplementOp::construct_tablet_ip_map(
const uint64_t tenant_id,
const ObTabletID &tablet_id,
hash::ObHashMap<ObAddr, ObArray<ObTabletID>> &ip_tablets_map)
{
int ret = OB_SUCCESS;
common::ObArray<ObAddr> paxos_server_list;
common::ObArray<ObAddr> unfinished_replica_addrs;
common::ObArray<ObTabletID> tablet_array;
int64_t paxos_member_count; // unused, but need
if (!tablet_id.is_valid() || OB_INVALID_TENANT_ID == tenant_id) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(tablet_id), K(tenant_id));
} else if (OB_FAIL(ObDDLUtil::get_tablet_paxos_member_list(tenant_id, tablet_id, paxos_server_list, paxos_member_count))) {
LOG_WARN("fail to get tablet replica location!", K(ret), K(tablet_id));
} else {
// classify un-merge tablet addr and tablet ids
for (int64_t i = 0; OB_SUCC(ret) && i < paxos_server_list.count(); ++i) {
tablet_array.reset();
const ObAddr & addr = paxos_server_list.at(i);
if (OB_FAIL(ip_tablets_map.get_refactored(addr, tablet_array))) {
if (OB_HASH_NOT_EXIST == ret) { // first time
ret = OB_SUCCESS;
if (OB_FAIL(tablet_array.push_back(tablet_id))) {
LOG_WARN("fail to push back to array", K(ret), K(tablet_id));
} else if (OB_FAIL(ip_tablets_map.set_refactored(addr, tablet_array, true/* overwrite */))) {
LOG_WARN("set ip tablet map fail.", K(ret), K(tablet_id), K(addr));
}
} else {
LOG_WARN("get ip tablet from map fail.", K(ret), K(tablet_id), K(addr));
}
} else if (OB_FAIL(tablet_array.push_back(tablet_id))) {
LOG_WARN("fail to push back to array", K(ret), K(tablet_id));
} else if (OB_FAIL(ip_tablets_map.set_refactored(addr, tablet_array, true/* overwrite */))) {
LOG_WARN("set ip tablet map fail.", K(ret), K(tablet_id), K(addr));
}
}
}
return ret;
}
int ObCheckTabletDataComplementOp::construct_ls_tablet_map(
const uint64_t tenant_id,
const common::ObTabletID &tablet_id,
hash::ObHashMap<ObLSID, ObArray<ObTabletID>> &ls_tablets_map)
{
int ret = OB_SUCCESS;
bool is_cache_hit = false;
share::ObLSID ls_id;
common::ObArray<ObTabletID> tablet_array;
if (!tablet_id.is_valid() || OB_INVALID_TENANT_ID == tenant_id) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(tablet_id), K(tenant_id));
} else if (OB_FAIL(GCTX.location_service_->get(tenant_id,
tablet_id,
INT64_MAX,
is_cache_hit, /*is_cache_hit*/
ls_id))) {
LOG_WARN("fail to get ls id according to tablet_id", K(ret), K(tenant_id), K(tablet_id));
} else if (OB_FAIL(ls_tablets_map.get_refactored(ls_id, tablet_array))) {
if (OB_HASH_NOT_EXIST == ret) { // first time
ret = OB_SUCCESS;
if (OB_FAIL(tablet_array.push_back(tablet_id))) {
LOG_WARN("fail to push back to array", K(ret), K(tablet_id));
} else if (OB_FAIL(ls_tablets_map.set_refactored(ls_id, tablet_array, false))) {
LOG_WARN("ls_tablets_map set fail", K(ret), K(tablet_id), K(ls_id));
}
} else {
LOG_WARN("ls_tablets_map get fail", K(ret), K(tablet_id), K(ls_id));
}
} else if (OB_FAIL(tablet_array.push_back(tablet_id))) {
LOG_WARN("fail to push back to array", K(ret), K(tablet_id));
} else if (OB_FAIL(ls_tablets_map.set_refactored(ls_id, tablet_array, true /* overwrite */))) {
LOG_WARN("ls_tablets_map set fail", K(ret), K(tablet_id), K(ls_id));
}
return ret;
}
int ObCheckTabletDataComplementOp::calculate_build_finish(
const uint64_t tenant_id,
const common::ObIArray<common::ObTabletID> &tablet_ids,
hash::ObHashMap<ObTabletID, int32_t> &tablets_commited_map,
int64_t &build_succ_count)
{
int ret = OB_SUCCESS;
common::ObArray<common::ObAddr> paxos_server_list; // unused
int64_t paxos_member_count = 0;
build_succ_count = 0;
if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("fail to check tablets commit status", K(ret), K(tenant_id));
} else if (tablets_commited_map.size() <= 0) {
// do nothing
} else {
int commited_count = 0;
for (int64_t tablet_idx = 0; OB_SUCC(ret) && tablet_idx < tablet_ids.count(); ++tablet_idx) {
common::ObTabletID tablet_id = tablet_ids.at(tablet_idx);
if (OB_FAIL(ObDDLUtil::get_tablet_paxos_member_list(tenant_id,
tablet_id,
paxos_server_list,
paxos_member_count))) {
LOG_WARN("fail to get tablet paxos member list.",
K(ret), K(tenant_id), K(tablet_id), K(paxos_server_list), K(paxos_member_count));
} else if (paxos_member_count == 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to check task tablet, unexpected!",
K(ret), K(paxos_member_count), K(tablet_id), K(tenant_id));
} else if (OB_FAIL(tablets_commited_map.get_refactored(tablet_id, commited_count))){
LOG_WARN("fail to get tablet commited map, unexpected!", K(ret), K(tablet_id));
} else if (commited_count < ((paxos_member_count >> 1) + 1)) { // not finished majority
// do nothing
} else {
build_succ_count++;
}
}
LOG_INFO("succ check and commit count", K(build_succ_count));
}
return ret;
}
int ObCheckTabletDataComplementOp::do_check_tablets_merge_status(
const uint64_t tenant_id,
const int64_t snapshot_version,
const ObIArray<ObTabletID> &tablet_ids,
const ObLSID &ls_id,
hash::ObHashMap<ObAddr, ObArray<ObTabletID>> &ip_tablets_map,
hash::ObHashMap<ObTabletID, int32_t> &tablets_commited_map,
int64_t &tablet_build_succ_count)
{
int ret = OB_SUCCESS;
obrpc::ObSrvRpcProxy *rpc_proxy = GCTX.srv_rpc_proxy_;
ip_tablets_map.reuse();
tablets_commited_map.reuse();
tablet_build_succ_count = 0;
if (OB_UNLIKELY(tablet_ids.count() < 0 || OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_TIMESTAMP == snapshot_version) ||
OB_ISNULL(rpc_proxy)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(tablet_ids.count()), K(tenant_id), K(snapshot_version), K(rpc_proxy));
} else {
rootserver::ObCheckTabletMergeStatusProxy proxy(*rpc_proxy,
&obrpc::ObSrvRpcProxy::check_ddl_tablet_merge_status);
obrpc::ObDDLCheckTabletMergeStatusArg arg;
const int64_t rpc_timeout = max(GCONF.rpc_timeout, 1000L * 1000L * 9L);
for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids.count(); ++i) {
const ObTabletID &tablet_id = tablet_ids.at(i);
if (OB_FAIL(construct_tablet_ip_map(tenant_id, tablet_id, ip_tablets_map))) {
LOG_WARN("fail to get tablet ip addr", K(ret), K(tablet_id));
}
}
// handle every addr tablet
for (hash::ObHashMap<ObAddr, ObArray<ObTabletID>>::const_iterator ip_iter = ip_tablets_map.begin();
ip_iter != ip_tablets_map.end() && OB_SUCC(ret); ++ip_iter) {
const ObAddr & dest_ip = ip_iter->first;
const ObArray<ObTabletID> &tablet_array = ip_iter->second;
if (OB_FAIL(arg.tablet_ids_.assign(tablet_array))) {
LOG_WARN("fail to get tablet ip addr", K(ret), K(tablet_array));
} else {
arg.tenant_id_ = tenant_id;
arg.ls_id_ = ls_id;
arg.snapshot_version_ = snapshot_version;
if (OB_FAIL(proxy.call(dest_ip, rpc_timeout, tenant_id, arg))) {
LOG_WARN("send rpc failed", K(ret), K(arg), K(dest_ip), K(tenant_id));
}
}
}
// handle batch result
int tmp_ret = OB_SUCCESS;
common::ObArray<int> return_ret_array;
if (OB_SUCCESS != (tmp_ret = proxy.wait_all(return_ret_array))) {
LOG_WARN("rpc proxy wait failed", K(tmp_ret));
ret = OB_SUCCESS == ret ? tmp_ret : ret;
} else if (return_ret_array.count() != ip_tablets_map.size()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("rpc proxy rsp size not equal to send size", K(ret),
K(return_ret_array.count()), K(ip_tablets_map.size()));
} else {
const ObIArray<const obrpc::ObDDLCheckTabletMergeStatusResult *> &result_array = proxy.get_results();
// 1. handle every ip addr result
for (int64_t i = 0; OB_SUCC(ret) && i < result_array.count(); i++) {
int return_ret = return_ret_array.at(i); // check return ret code
if (OB_SUCCESS == return_ret) {
const obrpc::ObDDLCheckTabletMergeStatusResult *cur_result = nullptr; // ip tablets status result
common::ObSArray<bool> tablet_rsp_array;
common::ObArray<ObTabletID> tablet_req_array;
const common::ObAddr &tablet_addr = proxy.get_dests().at(i); // get rpc dest addr
if (OB_ISNULL(cur_result = result_array.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("merge status result is null.", K(ret), K(cur_result));
} else if (FALSE_IT(tablet_rsp_array = cur_result->merge_status_)) {
} else if (OB_FAIL(ip_tablets_map.get_refactored(tablet_addr, tablet_req_array))) {
LOG_WARN("get from ip tablet map fail.", K(ret), K(tablet_addr));
} else if (tablet_req_array.count() != tablet_rsp_array.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet req count is not equal to tablet rsp count", K(ret), K(tablet_req_array), K(tablet_rsp_array));
} else {
// 2. handle every tablet status
for (int64_t idx = 0; OB_SUCC(ret) && idx < tablet_rsp_array.count(); ++idx) {
const common::ObTabletID &tablet_id = tablet_req_array.at(idx); // tablet id
const bool tablet_status = tablet_rsp_array.at(idx);
if (OB_FAIL(update_replica_merge_status(tablet_id, tablet_status, tablets_commited_map))) { // update tablet merge status from get
LOG_WARN("fail to update replica merge status", K(ret), K(tablet_id), K(tablet_addr));
} else {
LOG_INFO("succ to update replica merge status", K(tablet_addr), K(tablet_id), K(tablet_status));
}
}
}
} else {
LOG_WARN("rpc proxy return fail.", K(return_ret));
}
}
// 3. check any commit tablet
if (OB_SUCC(ret)) {
int64_t build_succ_count = 0;
if (OB_FAIL(calculate_build_finish(tenant_id, tablet_ids, tablets_commited_map, build_succ_count))) {
LOG_WARN("check and commit tbalets commit log fail.", K(ret), K(tablet_ids), K(build_succ_count));
} else {
DEBUG_SYNC(DDL_CHECK_TABLET_MERGE_STATUS);
tablet_build_succ_count += build_succ_count;
}
}
}
}
return ret;
}
int ObCheckTabletDataComplementOp::check_tablet_merge_status(
const uint64_t tenant_id,
const ObIArray<common::ObTabletID> &tablet_ids,
const int64_t snapshot_version,
bool &is_all_tablets_commited)
{
int ret = OB_SUCCESS;
is_all_tablets_commited = false;
hash::ObHashMap<ObAddr, ObArray<ObTabletID>> ip_tablets_map; // use for classify tablet replica addr
hash::ObHashMap<ObLSID, ObArray<ObTabletID>> ls_tablets_map; // use for classify tablet ls
hash::ObHashMap<ObTabletID, int32_t> tablets_commited_map;
const static int64_t max_map_hash_bucket = tablet_ids.count();
if (OB_UNLIKELY( tablet_ids.count() <= 0 || OB_INVALID_ID == tenant_id || OB_INVALID_TIMESTAMP == snapshot_version)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(tablet_ids.count()), K(tenant_id), K(snapshot_version));
} else if (OB_FAIL(ip_tablets_map.create(max_map_hash_bucket, "DdlTablet"))) {
LOG_WARN("fail to create ip_tablets_map", K(ret));
} else if (OB_FAIL(ls_tablets_map.create(max_map_hash_bucket, "DdlTablet"))) {
LOG_WARN("fail to create ls_tablets_map", K(ret));
} else if (OB_FAIL(tablets_commited_map.create(max_map_hash_bucket, "DdlTablet"))){
LOG_WARN("fail to create tablets_commited_map", K(ret));
} else {
const static int64_t batch_size = 100; // batch tablet number
int64_t total_build_succ_count = 0;
int64_t one_batch_build_succ_count = 0;
for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids.count(); ++i) {
const ObTabletID &tablet_id = tablet_ids.at(i);
if (construct_ls_tablet_map(tenant_id, tablet_id, ls_tablets_map)) {
LOG_WARN("construct_tablet_ls_map fail", K(ret), K(tenant_id), K(tablet_id));
} else {
if ((i != 0 && i % batch_size == 0) /* reach batch size */ || i == tablet_ids.count() - 1 /* reach end */) {
for (hash::ObHashMap<ObLSID, ObArray<ObTabletID>>::const_iterator ls_iter = ls_tablets_map.begin();
ls_iter != ls_tablets_map.end() && OB_SUCC(ret); ++ls_iter) {
const ObLSID &ls_id = ls_iter->first;
const ObArray<ObTabletID> &tablet_array = ls_iter->second;
if (OB_FAIL(do_check_tablets_merge_status(tenant_id,
snapshot_version,
tablet_array,
ls_id,
ip_tablets_map,
tablets_commited_map,
one_batch_build_succ_count))) {
LOG_WARN("do check tablets merge status fail", K(ret));
} else {
total_build_succ_count += one_batch_build_succ_count;
}
}
ls_tablets_map.reuse(); // reuse map
}
}
}
if (total_build_succ_count == tablet_ids.count()) {
is_all_tablets_commited = true;
} else {
int64_t total_tablets_count = tablet_ids.count();
LOG_WARN("not all tablets finished create sstables", K(ret), K(total_build_succ_count), K(total_tablets_count));
}
}
ip_tablets_map.destroy();
ls_tablets_map.destroy();
tablets_commited_map.destroy();
return ret;
}
/*
* 1. get a batch of tablets and construct a tmp ls_tablet_map
* 2. get tablet_ip_map and send async batch rpc and get results
* 3. push every tablet result to tablet_result_array
* 4. check result and find finished tablets
*/
int ObCheckTabletDataComplementOp::check_all_tablet_sstable_status(
const uint64_t tenant_id,
const uint64_t index_table_id,
const int64_t snapshot_version,
bool &is_all_sstable_build_finished)
{
int ret = OB_SUCCESS;
ObArray<ObTabletID> dest_tablet_ids;
is_all_sstable_build_finished = false;
if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == index_table_id || OB_INVALID_TIMESTAMP == snapshot_version)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(index_table_id), K(snapshot_version));
} else if (OB_FAIL(ObDDLUtil::get_tablets(tenant_id, index_table_id, dest_tablet_ids))) {
LOG_WARN("fail to get tablets", K(ret), K(tenant_id), K(index_table_id));
} else if (OB_FAIL(check_tablet_merge_status(tenant_id, dest_tablet_ids, snapshot_version, is_all_sstable_build_finished))){
LOG_WARN("fail to check tablet merge status.", K(ret), K(tenant_id), K(dest_tablet_ids), K(snapshot_version));
}
return ret;
}
/*
* This func is used to check duplicate data completement inner sql
* if has running inner sql, we should wait until finished. But
* if not has running inner sql, we should found if all tablet sstable
* has builded already. If not all builded and no inner sql running, or
* error case happen, we still execute new inner sql outside.
*/
int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(
const uint64_t tenant_id,
const uint64_t table_id,
const common::ObAddr &inner_sql_exec_addr,
const common::ObCurTraceId::TraceId &trace_id,
const int64_t schema_version,
const int64_t scn,
bool &need_exec_new_inner_sql)
{
int ret = OB_SUCCESS;
need_exec_new_inner_sql = true; // default need execute new inner sql
bool is_old_task_session_exist = false;
bool is_all_sstable_build_finished = false;
bool need_wait = false;
if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == table_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(table_id));
} else {
LOG_INFO("start to check and wait complement task", K(tenant_id), K(table_id), K(inner_sql_exec_addr), K(trace_id));
do {
if (OB_FAIL(check_all_tablet_sstable_status(tenant_id,
table_id,
scn,
is_all_sstable_build_finished))) {
LOG_WARN("fail to check task tablet sstable status", K(ret), K(tenant_id), K(table_id), K(scn));
} else if (is_all_sstable_build_finished) {
LOG_INFO("all tablet sstable has build finished");
} else {
if (OB_FAIL(check_task_inner_sql_session_status(inner_sql_exec_addr,
trace_id,
tenant_id,
schema_version,
scn,
is_old_task_session_exist))) {
LOG_WARN("fail check task inner sql session status", K(ret), K(trace_id), K(inner_sql_exec_addr));
} else if (!is_old_task_session_exist) {
LOG_WARN("old inner sql session is not exist.", K(ret));
} else {
usleep(10 * 1000); // sleep 10ms
}
}
need_wait = !is_all_sstable_build_finished && is_old_task_session_exist;
} while (OB_SUCC(ret) && need_wait); // TODO: time out
///// end
/* Only in table all sstables not finished case, we will do retry */
if (is_all_sstable_build_finished) {
need_exec_new_inner_sql = false;
LOG_INFO("no need to execute inner sql to do complement.", K(need_exec_new_inner_sql));
}
}
LOG_INFO("end to check and wait complement task", K(ret),
K(table_id), K(is_old_task_session_exist), K(is_all_sstable_build_finished), K(need_exec_new_inner_sql));
return ret;
}