diff --git a/be/src/common/status.h b/be/src/common/status.h index 3651576e11..b8c4194fb9 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -291,6 +291,15 @@ private: return ret_code; \ } \ } while (0); -} + +#define RETURN_NOT_OK_STATUS_WITH_WARN(stmt, warning_prefix) \ + do { \ + const Status& _s = (stmt); \ + if (UNLIKELY(!_s.ok())) { \ + LOG(WARNING) << (warning_prefix) << ", error: " << _s.to_string(); \ + return _s; \ + } \ + } while (0); +} // namespace doris #define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 1e8d5cca87..f577f973df 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -46,10 +46,13 @@ #include "olap/tablet_meta_manager.h" #include "olap/utils.h" // for check_dir_existed #include "service/backend_options.h" +#include "util/errno.h" #include "util/file_utils.h" #include "util/monotime.h" #include "util/string_util.h" +using strings::Substitute; + namespace doris { static const char* const kMtabPath = "/etc/mtab"; @@ -78,20 +81,21 @@ DataDir::~DataDir() { Status DataDir::init() { if (!FileUtils::check_exist(_path)) { - LOG(WARNING) << "opendir failed, path=" << _path; - return Status::InternalError("opendir failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(Status::IOError(Substitute("opendir failed, path=$0", _path)), + "check file exist failed"); } std::string align_tag_path = _path + ALIGN_TAG_PREFIX; if (access(align_tag_path.c_str(), F_OK) == 0) { - LOG(WARNING) << "align tag was found, path=" << _path; - return Status::InternalError("invalid root path: "); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::NotFound(Substitute("align tag $0 was found", align_tag_path)), + "access file failed"); } - RETURN_IF_ERROR(update_capacity()); - RETURN_IF_ERROR(_init_cluster_id()); - RETURN_IF_ERROR(_init_capacity()); - RETURN_IF_ERROR(_init_file_system()); - RETURN_IF_ERROR(_init_meta()); + RETURN_NOT_OK_STATUS_WITH_WARN(update_capacity(), "update_capacity failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(_init_cluster_id(), "_init_cluster_id failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(_init_capacity(), "_init_capacity failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(_init_file_system(), "_init_file_system failed"); + RETURN_NOT_OK_STATUS_WITH_WARN(_init_meta(), "_init_meta failed"); _is_used = true; return Status::OK(); @@ -108,10 +112,10 @@ Status DataDir::_init_cluster_id() { int fd = open(cluster_id_path.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (fd < 0 || close(fd) < 0) { - char errmsg[64]; - LOG(WARNING) << "fail to create file. [path='" << cluster_id_path << "' err='" - << strerror_r(errno, errmsg, 64) << "']"; - return Status::InternalError("invalid store path: create cluster id failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError(Substitute("failed to create cluster id file $0, err=$1", + cluster_id_path, errno_to_string(errno))), + "create file failed"); } } @@ -119,16 +123,18 @@ Status DataDir::_init_cluster_id() { FILE* fp = NULL; fp = fopen(cluster_id_path.c_str(), "r+b"); if (fp == NULL) { - LOG(WARNING) << "fail to open cluster id path. path=" << cluster_id_path; - return Status::InternalError("invalid store path: open cluster id failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError(Substitute("failed to open cluster id file $0", cluster_id_path)), + "open file filed"); } int lock_res = flock(fp->_fileno, LOCK_EX | LOCK_NB); if (lock_res < 0) { - LOG(WARNING) << "fail to lock file descriptor. path=" << cluster_id_path; fclose(fp); fp = NULL; - return Status::InternalError("invalid store path: flock cluster id failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError(Substitute("failed to flock cluster id file $0", cluster_id_path)), + "flock file failed"); } // obtain cluster id of all root paths @@ -137,13 +143,14 @@ Status DataDir::_init_cluster_id() { return st; } -Status DataDir::_read_cluster_id(const std::string& path, int32_t* cluster_id) { +Status DataDir::_read_cluster_id(const std::string& cluster_id_path, int32_t* cluster_id) { int32_t tmp_cluster_id = -1; - std::fstream fs(path.c_str(), std::fstream::in); + std::fstream fs(cluster_id_path.c_str(), std::fstream::in); if (!fs.is_open()) { - LOG(WARNING) << "fail to open cluster id path. [path='" << path << "']"; - return Status::InternalError("open file failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError(Substitute("failed to open cluster id file $0", cluster_id_path)), + "open file failed"); } fs >> tmp_cluster_id; @@ -154,12 +161,12 @@ Status DataDir::_read_cluster_id(const std::string& path, int32_t* cluster_id) { } else if (tmp_cluster_id >= 0 && (fs.rdstate() & std::fstream::eofbit) != 0) { *cluster_id = tmp_cluster_id; } else { - OLAP_LOG_WARNING( - "fail to read cluster id from file. " - "[id=%d eofbit=%d failbit=%d badbit=%d]", - tmp_cluster_id, fs.rdstate() & std::fstream::eofbit, - fs.rdstate() & std::fstream::failbit, fs.rdstate() & std::fstream::badbit); - return Status::InternalError("cluster id file corrupt"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::Corruption( + Substitute("cluster id file $0 is corrupt. [id=$1 eofbit=$2 failbit=$3 badbit=$4]", + cluster_id_path, tmp_cluster_id, fs.rdstate() & std::fstream::eofbit, + fs.rdstate() & std::fstream::failbit, fs.rdstate() & std::fstream::badbit)), + "file content is error"); } return Status::OK(); } @@ -170,16 +177,18 @@ Status DataDir::_init_capacity() { if (_capacity_bytes == -1) { _capacity_bytes = disk_capacity; } else if (_capacity_bytes > disk_capacity) { - LOG(WARNING) << "root path capacity should not larger than disk capacity. " - << "path=" << _path << ", capacity_bytes=" << _capacity_bytes - << ", disk_capacity=" << disk_capacity; - return Status::InternalError("invalid store path: invalid capacity"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::InvalidArgument( + Substitute("root path $0's capacity $1 should not larger than disk capacity $2", + _path, _capacity_bytes, disk_capacity)), + "init capacity failed"); } std::string data_path = _path + DATA_PREFIX; if (!FileUtils::check_exist(data_path) && !FileUtils::create_dir(data_path).ok()) { - LOG(WARNING) << "failed to create data root path. path=" << data_path; - return Status::InternalError("invalid store path: failed to create data directory"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError(Substitute("failed to create data root path $0", data_path)), + "check_exist failed"); } return Status::OK(); @@ -188,10 +197,10 @@ Status DataDir::_init_capacity() { Status DataDir::_init_file_system() { struct stat s; if (stat(_path.c_str(), &s) != 0) { - char errmsg[64]; - LOG(WARNING) << "stat failed, path=" << _path << ", errno=" << errno - << ", errmsg=" << strerror_r(errno, errmsg, 64); - return Status::InternalError("invalid store path: stat failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError( + Substitute("stat file $0 failed, err=$1", _path, errno_to_string(errno))), + "stat file failed"); } dev_t mount_device; @@ -203,10 +212,10 @@ Status DataDir::_init_file_system() { FILE* mount_tablet = nullptr; if ((mount_tablet = setmntent(kMtabPath, "r")) == NULL) { - char errmsg[64]; - LOG(WARNING) << "setmntent failed, path=" << kMtabPath << ", errno=" << errno - << ", errmsg=" << strerror_r(errno, errmsg, 64); - return Status::InternalError("invalid store path: setmntent failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError( + Substitute("setmntent file $0 failed, err=$1", _path, errno_to_string(errno))), + "setmntent file failed"); } bool is_find = false; @@ -234,8 +243,9 @@ Status DataDir::_init_file_system() { endmntent(mount_tablet); if (!is_find) { - LOG(WARNING) << "fail to find file system, path=" << _path; - return Status::InternalError("invalid store path: find file system failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError(Substitute("file system $0 not found", _path)), + "find file system failed"); } _file_system = mount_entry->mnt_fsname; @@ -251,13 +261,15 @@ Status DataDir::_init_meta() { // init meta _meta = new (std::nothrow) OlapMeta(_path); if (_meta == nullptr) { - LOG(WARNING) << "new olap meta failed"; - return Status::InternalError("new olap meta failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::MemoryAllocFailed("allocate memory for OlapMeta failed"), + "new OlapMeta failed"); } OLAPStatus res = _meta->init(); if (res != OLAP_SUCCESS) { - LOG(WARNING) << "init meta failed"; - return Status::InternalError("init meta failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError(Substitute("open rocksdb failed, path=$0", _path)), + "init OlapMeta failed"); } return Status::OK(); } @@ -344,13 +356,12 @@ void DataDir::clear_tablets(std::vector* tablet_infos) { } std::string DataDir::get_absolute_shard_path(int64_t shard_id) { - return strings::Substitute("$0$1/$2", _path, DATA_PREFIX, shard_id); + return Substitute("$0$1/$2", _path, DATA_PREFIX, shard_id); } std::string DataDir::get_absolute_tablet_path(int64_t shard_id, int64_t tablet_id, int32_t schema_hash) { - return strings::Substitute("$0/$1/$2", get_absolute_shard_path(shard_id), tablet_id, - schema_hash); + return Substitute("$0/$1/$2", get_absolute_shard_path(shard_id), tablet_id, schema_hash); } void DataDir::find_tablet_in_trash(int64_t tablet_id, std::vector* paths) { @@ -874,8 +885,10 @@ Status DataDir::update_capacity() { _disk_capacity_bytes = path_info.capacity; } } catch (boost::filesystem::filesystem_error& e) { - LOG(WARNING) << "get space info failed. path: " << _path << " erro:" << e.what(); - return Status::InternalError("get path available capacity failed"); + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::IOError( + Substitute("get path $0 available capacity failed, error=$1", _path, e.what())), + "boost::filesystem::space failed"); } LOG(INFO) << "path: " << _path << " total capacity: " << _disk_capacity_bytes << ", available capacity: " << _available_bytes; diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h index 7456c170da..58992ed2f1 100644 --- a/be/src/olap/data_dir.h +++ b/be/src/olap/data_dir.h @@ -130,7 +130,7 @@ private: Status _check_disk(); OLAPStatus _read_and_write_test_file(); - Status _read_cluster_id(const std::string& path, int32_t* cluster_id); + Status _read_cluster_id(const std::string& cluster_id_path, int32_t* cluster_id); Status _write_cluster_id_to_path(const std::string& path, int32_t cluster_id); OLAPStatus _clean_unfinished_converting_data(); OLAPStatus _convert_old_tablet(); diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index da17bdbcef..b3c136b1dd 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -128,7 +128,6 @@ enum OLAPStatus { OLAP_ERR_EVAL_CONJUNCTS_ERROR = -120, OLAP_ERR_COPY_FILE_ERROR = -121, OLAP_ERR_FILE_ALREADY_EXIST = -122, - OLAP_ERR_TOO_FEW_FILE_DESCRITPROR = -123, // common errors codes // [-200, -300) diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 3c210f5dd1..c7b433fcc6 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -76,6 +76,7 @@ using std::set_difference; using std::string; using std::stringstream; using std::vector; +using strings::Substitute; namespace doris { @@ -92,11 +93,7 @@ Status StorageEngine::open(const EngineOptions& options, StorageEngine** engine_ RETURN_IF_ERROR(_validate_options(options)); LOG(INFO) << "starting backend using uid:" << options.backend_uid.to_string(); std::unique_ptr engine(new StorageEngine(options)); - auto st = engine->_open(); - if (st != OLAP_SUCCESS) { - LOG(WARNING) << "engine open failed, res=" << st; - return Status::InternalError("open engine failed"); - } + RETURN_NOT_OK_STATUS_WITH_WARN(engine->_open(), "open engine failed"); *engine_ptr = engine.release(); LOG(INFO) << "success to init storage engine."; return Status::OK(); @@ -145,16 +142,16 @@ void StorageEngine::load_data_dirs(const std::vector& data_dirs) { } } -OLAPStatus StorageEngine::_open() { +Status StorageEngine::_open() { // init store_map - RETURN_NOT_OK(_init_store_map()); + RETURN_NOT_OK_STATUS_WITH_WARN(_init_store_map(), "_init_store_map failed"); _effective_cluster_id = config::cluster_id; - RETURN_NOT_OK_LOG(_check_all_root_path_cluster_id(), "fail to check cluster info."); + RETURN_NOT_OK_STATUS_WITH_WARN(_check_all_root_path_cluster_id(), "fail to check cluster id"); _update_storage_medium_type_count(); - RETURN_NOT_OK(_check_file_descriptor_number()); + RETURN_NOT_OK_STATUS_WITH_WARN(_check_file_descriptor_number(), "check fd number failed"); _index_stream_lru_cache = new_lru_cache(config::index_stream_cache_capacity); @@ -170,22 +167,26 @@ OLAPStatus StorageEngine::_open() { _parse_default_rowset_type(); - return OLAP_SUCCESS; + return Status::OK(); } -OLAPStatus StorageEngine::_init_store_map() { +Status StorageEngine::_init_store_map() { std::vector tmp_stores; std::vector threads; - std::atomic init_error{false}; + SpinLock error_msg_lock; + std::string error_msg; for (auto& path : _options.store_paths) { DataDir* store = new DataDir(path.path, path.capacity_bytes, path.storage_medium, _tablet_manager.get(), _txn_manager.get()); tmp_stores.emplace_back(store); - threads.emplace_back([store, &init_error]() { + threads.emplace_back([store, &error_msg_lock, &error_msg]() { auto st = store->init(); if (!st.ok()) { - init_error = true; - LOG(WARNING) << "Store load failed, status="<< st.to_string() << ", path=" << store->path(); + { + std::lock_guard l(error_msg_lock); + error_msg.append(st.to_string() + ";"); + } + LOG(WARNING) << "Store load failed, status=" << st.to_string() << ", path=" << store->path(); } }); } @@ -193,17 +194,17 @@ OLAPStatus StorageEngine::_init_store_map() { thread.join(); } - if (init_error) { + if (!error_msg.empty()) { for (auto store : tmp_stores) { delete store; } - return OLAP_ERR_INVALID_ROOT_PATH; + return Status::InternalError(Substitute("init path failed, error=$0", error_msg)); } for (auto store : tmp_stores) { _store_map.emplace(store->path(), store); } - return OLAP_SUCCESS; + return Status::OK(); } void StorageEngine::_update_storage_medium_type_count() { @@ -219,27 +220,26 @@ void StorageEngine::_update_storage_medium_type_count() { _available_storage_medium_type_count = available_storage_medium_types.size(); } -OLAPStatus StorageEngine::_judge_and_update_effective_cluster_id(int32_t cluster_id) { - OLAPStatus res = OLAP_SUCCESS; - +Status StorageEngine::_judge_and_update_effective_cluster_id(int32_t cluster_id) { if (cluster_id == -1 && _effective_cluster_id == -1) { // maybe this is a new cluster, cluster id will get from heartbeat message - return res; + return Status::OK(); } else if (cluster_id != -1 && _effective_cluster_id == -1) { _effective_cluster_id = cluster_id; - return res; + return Status::OK(); } else if (cluster_id == -1 && _effective_cluster_id != -1) { // _effective_cluster_id is the right effective cluster id - return res; + return Status::OK(); } else { if (cluster_id != _effective_cluster_id) { - LOG(WARNING) << "multiple cluster ids is not equal. id1=" << _effective_cluster_id - << " id2=" << cluster_id; - return OLAP_ERR_INVALID_CLUSTER_INFO; + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::Corruption(Substitute("multiple cluster ids is not equal. one=$0, other=", + _effective_cluster_id, cluster_id)), + "cluster id not equal"); } } - return res; + return Status::OK(); } void StorageEngine::set_store_used_flag(const string& path, bool is_used) { @@ -329,24 +329,24 @@ void StorageEngine::_start_disk_stat_monitor() { } // TODO(lingbin): Should be in EnvPosix? -OLAPStatus StorageEngine::_check_file_descriptor_number() { +Status StorageEngine::_check_file_descriptor_number() { struct rlimit l; int ret = getrlimit(RLIMIT_NOFILE , &l); if (ret != 0) { LOG(WARNING) << "call getrlimit() failed. errno=" << strerror(errno) << ", use default configuration instead."; - return OLAP_SUCCESS; + return Status::OK(); } if (l.rlim_cur < config::min_file_descriptor_number) { LOG(ERROR) << "File descriptor number is less than " << config::min_file_descriptor_number << ". Please use (ulimit -n) to set a value equal or greater than " << config::min_file_descriptor_number; - return OLAP_ERR_TOO_FEW_FILE_DESCRITPROR; + return Status::InternalError("file descriptors limit is too small"); } - return OLAP_SUCCESS; + return Status::OK(); } -OLAPStatus StorageEngine::_check_all_root_path_cluster_id() { +Status StorageEngine::_check_all_root_path_cluster_id() { int32_t cluster_id = -1; for (auto& it : _store_map) { int32_t tmp_cluster_id = it.second->cluster_id(); @@ -357,22 +357,22 @@ OLAPStatus StorageEngine::_check_all_root_path_cluster_id() { } else if (cluster_id == -1) { cluster_id = tmp_cluster_id; } else { - LOG(WARNING) << "multiple cluster ids is not equal. one=" << cluster_id - << ", other=" << tmp_cluster_id; - return OLAP_ERR_INVALID_CLUSTER_INFO; + RETURN_NOT_OK_STATUS_WITH_WARN( + Status::Corruption(Substitute("multiple cluster ids is not equal. one=$0, other=", + cluster_id, tmp_cluster_id)), + "cluster id not equal"); } } // judge and get effective cluster id - OLAPStatus res = OLAP_SUCCESS; - RETURN_NOT_OK(_judge_and_update_effective_cluster_id(cluster_id)); + RETURN_IF_ERROR(_judge_and_update_effective_cluster_id(cluster_id)); // write cluster id into cluster_id_path if get effective cluster id success if (_effective_cluster_id != -1 && !_is_all_cluster_id_exist) { set_cluster_id(_effective_cluster_id); } - return res; + return Status::OK(); } Status StorageEngine::set_cluster_id(int32_t cluster_id) { diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index c1768e2ac8..647bde8ab6 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -197,19 +197,19 @@ public: private: // Instance should be inited from `static open()` // MUST NOT be called in other circumstances. - OLAPStatus _open(); + Status _open(); // Clear status(tables, ...) void _clear(); - OLAPStatus _init_store_map(); + Status _init_store_map(); void _update_storage_medium_type_count(); // Some check methods - OLAPStatus _check_file_descriptor_number(); - OLAPStatus _check_all_root_path_cluster_id(); - OLAPStatus _judge_and_update_effective_cluster_id(int32_t cluster_id); + Status _check_file_descriptor_number(); + Status _check_all_root_path_cluster_id(); + Status _judge_and_update_effective_cluster_id(int32_t cluster_id); bool _delete_tablets_on_unused_root_path(); diff --git a/be/test/olap/delete_handler_test.cpp b/be/test/olap/delete_handler_test.cpp index 1ee19ca276..9aa8722fac 100644 --- a/be/test/olap/delete_handler_test.cpp +++ b/be/test/olap/delete_handler_test.cpp @@ -61,7 +61,8 @@ void set_up() { doris::EngineOptions options; options.store_paths = paths; - doris::StorageEngine::open(options, &k_engine); + Status s = doris::StorageEngine::open(options, &k_engine); + ASSERT_TRUE(s.ok()) << s.to_string(); } void tear_down() { diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp index ac52ea9ef3..a14c74d2f8 100644 --- a/be/test/olap/delta_writer_test.cpp +++ b/be/test/olap/delta_writer_test.cpp @@ -60,7 +60,8 @@ void set_up() { doris::EngineOptions options; options.store_paths = paths; - doris::StorageEngine::open(options, &k_engine); + Status s = doris::StorageEngine::open(options, &k_engine); + ASSERT_TRUE(s.ok()) << s.to_string(); ExecEnv* exec_env = doris::ExecEnv::GetInstance(); exec_env->set_storage_engine(k_engine); diff --git a/be/test/olap/memtable_flush_executor_test.cpp b/be/test/olap/memtable_flush_executor_test.cpp index b7408de615..d44850dab7 100644 --- a/be/test/olap/memtable_flush_executor_test.cpp +++ b/be/test/olap/memtable_flush_executor_test.cpp @@ -55,7 +55,8 @@ void set_up() { doris::EngineOptions options; options.store_paths = paths; - doris::StorageEngine::open(options, &k_engine); + Status s = doris::StorageEngine::open(options, &k_engine); + ASSERT_TRUE(s.ok()) << s.to_string(); ExecEnv* exec_env = doris::ExecEnv::GetInstance(); exec_env->set_storage_engine(k_engine); diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp index 1c7520221f..f625512839 100644 --- a/be/test/olap/rowset/beta_rowset_test.cpp +++ b/be/test/olap/rowset/beta_rowset_test.cpp @@ -64,7 +64,8 @@ protected: doris::EngineOptions options; options.store_paths = paths; - doris::StorageEngine::open(options, &k_engine); + Status s = doris::StorageEngine::open(options, &k_engine); + ASSERT_TRUE(s.ok()) << s.to_string(); ExecEnv* exec_env = doris::ExecEnv::GetInstance(); exec_env->set_storage_engine(k_engine); diff --git a/be/test/olap/rowset/rowset_converter_test.cpp b/be/test/olap/rowset/rowset_converter_test.cpp index b7a34e18e8..5ef600cd2e 100644 --- a/be/test/olap/rowset/rowset_converter_test.cpp +++ b/be/test/olap/rowset/rowset_converter_test.cpp @@ -165,7 +165,8 @@ public: doris::EngineOptions options; options.store_paths = paths; if (k_engine == nullptr) { - doris::StorageEngine::open(options, &k_engine); + Status s = doris::StorageEngine::open(options, &k_engine); + ASSERT_TRUE(s.ok()) << s.to_string(); } ExecEnv* exec_env = doris::ExecEnv::GetInstance();