From 273aad6cf4deee4212e91c6e18310f298610fb80 Mon Sep 17 00:00:00 2001 From: Dayue Gao Date: Fri, 15 May 2020 10:11:17 +0800 Subject: [PATCH] [Bug] Restore tablet action not working because tablet status is shutdown (#3551) --- be/src/http/action/restore_tablet_action.cpp | 2 +- be/src/olap/storage_engine.cpp | 5 +++-- be/src/olap/storage_engine.h | 3 ++- be/src/olap/tablet_manager.cpp | 16 ++++++++++++---- be/src/olap/tablet_manager.h | 9 +++++++-- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index 53f9f177bf..54704a36f9 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -117,7 +117,7 @@ Status RestoreTabletAction::_reload_tablet(const std::string& key, const std::st clone_req.__set_tablet_id(tablet_id); clone_req.__set_schema_hash(schema_hash); OLAPStatus res = OLAPStatus::OLAP_SUCCESS; - res = _exec_env->storage_engine()->load_header(shard_path, clone_req); + res = _exec_env->storage_engine()->load_header(shard_path, clone_req, /*restore=*/true); if (res != OLAPStatus::OLAP_SUCCESS) { LOG(WARNING) << "load header failed. status: " << res << ", signature: " << tablet_id; // remove tablet data path in data path diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index cabf6b57d8..3c210f5dd1 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -824,7 +824,8 @@ OLAPStatus StorageEngine::obtain_shard_path( OLAPStatus StorageEngine::load_header( const string& shard_path, - const TCloneReq& request) { + const TCloneReq& request, + bool restore) { LOG(INFO) << "begin to process load headers." << "tablet_id=" << request.tablet_id << ", schema_hash=" << request.schema_hash; @@ -864,7 +865,7 @@ OLAPStatus StorageEngine::load_header( res = _tablet_manager->load_tablet_from_dir( store, request.tablet_id, request.schema_hash, - schema_hash_path_stream.str(), false); + schema_hash_path_stream.str(), false, restore); if (res != OLAP_SUCCESS) { LOG(WARNING) << "fail to process load headers. res=" << res; return res; diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index a29f1c12fd..c1768e2ac8 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -129,8 +129,9 @@ public: // // @param [in] root_path specify root path of new tablet // @param [in] request specify new tablet info + // @param [in] restore whether we're restoring a tablet from trash // @return OLAP_SUCCESS if load tablet success - OLAPStatus load_header(const std::string& shard_path, const TCloneReq& request); + OLAPStatus load_header(const std::string& shard_path, const TCloneReq& request, bool restore = false); // To trigger a disk-stat and tablet report void trigger_report() { diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 24244bc4ee..c5693dd138 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -771,7 +771,7 @@ TabletSharedPtr TabletManager::find_best_tablet_to_compaction(CompactionType com } OLAPStatus TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_id, - TSchemaHash schema_hash, const string& meta_binary, bool update_meta, bool force) { + TSchemaHash schema_hash, const string& meta_binary, bool update_meta, bool force, bool restore) { RWMutex& tablet_map_lock = _get_tablet_map_lock(tablet_id); WriteLock wlock(&tablet_map_lock); TabletMetaSharedPtr tablet_meta(new TabletMeta()); @@ -797,6 +797,11 @@ OLAPStatus TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tab return OLAP_ERR_HEADER_PB_PARSE_FAILED; } + if (restore) { + // we're restoring tablet from trash, tablet state should be changed from shutdown back to running + tablet_meta->set_tablet_state(TABLET_RUNNING); + } + TabletSharedPtr tablet = Tablet::create_tablet_from_meta(tablet_meta, data_dir); if (tablet == nullptr) { LOG(WARNING) << "fail to load tablet. tablet_id=" << tablet_id @@ -833,11 +838,14 @@ OLAPStatus TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tab OLAPStatus TabletManager::load_tablet_from_dir(DataDir* store, TTabletId tablet_id, SchemaHash schema_hash, const string& schema_hash_path, - bool force) { + bool force, + bool restore) { LOG(INFO) << "begin to load tablet from dir. " << " tablet_id=" << tablet_id << " schema_hash=" << schema_hash - << " path = " << schema_hash_path; + << " path = " << schema_hash_path + << " force = " << force + << " restore = " << restore; // not add lock here, because load_tablet_from_meta already add lock string header_path = TabletMeta::construct_header_file_path(schema_hash_path, tablet_id); // should change shard id before load tablet @@ -865,7 +873,7 @@ OLAPStatus TabletManager::load_tablet_from_dir(DataDir* store, TTabletId tablet_ string meta_binary; tablet_meta->serialize(&meta_binary); RETURN_NOT_OK_LOG(load_tablet_from_meta(store, tablet_id, schema_hash, - meta_binary, true, force), + meta_binary, true, force, restore), Substitute("fail to load tablet. header_path=$0", header_path)); return OLAP_SUCCESS; diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index b47be431f7..347fb1071e 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -96,15 +96,20 @@ public: void get_tablet_stat(TTabletStatResult* result); // parse tablet header msg to generate tablet object + // - restore: whether the request is from restore tablet action, + // where we should change tablet status from shutdown back to running OLAPStatus load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_id, TSchemaHash schema_hash, const std::string& header, - bool update_meta, bool force = false); + bool update_meta, + bool force = false, + bool restore = false); OLAPStatus load_tablet_from_dir(DataDir* data_dir, TTabletId tablet_id, SchemaHash schema_hash, const std::string& schema_hash_path, - bool force = false); + bool force = false, + bool restore = false); void release_schema_change_lock(TTabletId tablet_id);