diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index db3bcad5f9..be363346e2 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -29,10 +29,12 @@ #include #include -#include "boost/filesystem.hpp" -#include "boost/lexical_cast.hpp" +#include +#include + #include "agent/status.h" #include "agent/utils.h" +#include "env/env.h" #include "gen_cpp/FrontendService.h" #include "gen_cpp/Types_types.h" #include "http/http_client.h" @@ -1552,7 +1554,7 @@ void* TaskWorkerPool::_make_snapshot_thread_callback(void* arg_this) { std::stringstream ss; ss << snapshot_path << "/" << snapshot_request.tablet_id << "/" << snapshot_request.schema_hash << "/"; - Status st = FileUtils::scan_dir(ss.str(), &snapshot_files); + Status st = FileUtils::list_files(Env::Default(), ss.str(), &snapshot_files); if (!st.ok()) { status_code = TStatusCode::RUNTIME_ERROR; OLAP_LOG_WARNING("make_snapshot failed. tablet_id: %ld, schema_hash: %ld, version: %d," diff --git a/be/src/common/status.h b/be/src/common/status.h index 655b3c88ee..e095f886cf 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -129,6 +129,8 @@ public: bool is_thrift_rpc_error() const { return code() == TStatusCode::THRIFT_RPC_ERROR; } bool is_end_of_file() const { return code() == TStatusCode::END_OF_FILE; } + + bool is_not_found() const { return code() == TStatusCode::NOT_FOUND; } // Convert into TStatus. Call this if 'status_container' contains an optional // TStatus field named 'status'. This also sets __isset.status. template diff --git a/be/src/env/env.h b/be/src/env/env.h index 338fb08024..704c86066e 100644 --- a/be/src/env/env.h +++ b/be/src/env/env.h @@ -118,6 +118,22 @@ public: virtual Status get_children(const std::string& dir, std::vector* result) = 0; + // Iterate the specified directory and call given callback function with child's + // name. This function continues execution until all children have been iterated + // or callback function return false. + // The names are relative to "dir". + // + // The function call extra cost is acceptable. Compared with returning all children + // into a given vector, the performance of this method is 5% worse. However this + // approach is more flexiable and efficient in fulfilling other requirements. + // + // Returns OK if "dir" exists. + // NotFound if "dir" does not exist, the calling process does not have + // permission to access "dir", or if "dir" is invalid. + // IOError if an IO Error was encountered + virtual Status iterate_dir(const std::string& dir, + const std::function& cb) = 0; + // Delete the named file. virtual Status delete_file(const std::string& fname) = 0; diff --git a/be/src/env/env_posix.cpp b/be/src/env/env_posix.cpp index 43500163f5..4bb087b16a 100644 --- a/be/src/env/env_posix.cpp +++ b/be/src/env/env_posix.cpp @@ -565,6 +565,23 @@ public: return Status::OK(); } + Status iterate_dir(const std::string& dir, + const std::function& cb) override { + DIR* d = opendir(dir.c_str()); + if (d == nullptr) { + return io_error(dir, errno); + } + struct dirent* entry; + while ((entry = readdir(d)) != nullptr) { + // callback returning false means to terminate iteration + if (!cb(entry->d_name)) { + break; + } + } + closedir(d); + return Status::OK(); + } + Status delete_file(const std::string& fname) override { if (unlink(fname.c_str()) != 0) { return io_error(fname, errno); diff --git a/be/src/http/action/restore_tablet_action.cpp b/be/src/http/action/restore_tablet_action.cpp index 6d0e1a740c..bb24881889 100644 --- a/be/src/http/action/restore_tablet_action.cpp +++ b/be/src/http/action/restore_tablet_action.cpp @@ -24,6 +24,7 @@ #include "boost/lexical_cast.hpp" #include "agent/cgroups_mgr.h" +#include "env/env.h" #include "http/http_channel.h" #include "http/http_headers.h" #include "http/http_request.h" @@ -189,7 +190,7 @@ Status RestoreTabletAction::_restore(const std::string& key, int64_t tablet_id, Status RestoreTabletAction::_create_hard_link_recursive(const std::string& src, const std::string& dst) { std::vector files; - RETURN_IF_ERROR(FileUtils::scan_dir(src, &files)); + RETURN_IF_ERROR(FileUtils::list_files(Env::Default(), src, &files)); for (auto& file : files) { std::string from = src + "/" + file; std::string to = dst + "/" + file; diff --git a/be/src/http/download_action.cpp b/be/src/http/download_action.cpp index 1d4db19573..4a2afad275 100644 --- a/be/src/http/download_action.cpp +++ b/be/src/http/download_action.cpp @@ -29,6 +29,7 @@ #include #include "agent/cgroups_mgr.h" +#include "env/env.h" #include "http/http_channel.h" #include "http/http_headers.h" #include "http/http_request.h" @@ -138,7 +139,7 @@ void DownloadAction::handle(HttpRequest *req) { void DownloadAction::do_dir_response( const std::string& dir_path, HttpRequest *req) { std::vector files; - Status status = FileUtils::scan_dir(dir_path, &files); + Status status = FileUtils::list_files(Env::Default(), dir_path, &files); if (!status.ok()) { LOG(WARNING) << "Failed to scan dir. dir=" << dir_path; HttpChannel::send_error(req, HttpStatus::INTERNAL_SERVER_ERROR); diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index dba6d76f5d..acd8da4adc 100755 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -37,6 +37,7 @@ #include #include +#include "env/env.h" #include "olap/file_helper.h" #include "olap/olap_define.h" #include "olap/olap_snapshot_converter.h" @@ -472,7 +473,7 @@ void DataDir::find_tablet_in_trash(int64_t tablet_id, std::vector* // path: /root_path/trash/time_label/tablet_id/schema_hash std::string trash_path = _path + TRASH_PREFIX; std::vector sub_dirs; - FileUtils::scan_dir(trash_path, &sub_dirs); + FileUtils::list_files(Env::Default(), trash_path, &sub_dirs); for (auto& sub_dir : sub_dirs) { // sub dir is time_label std::string sub_path = trash_path + "/" + sub_dir; diff --git a/be/src/runtime/load_path_mgr.cpp b/be/src/runtime/load_path_mgr.cpp index b992c92813..a794ae792d 100644 --- a/be/src/runtime/load_path_mgr.cpp +++ b/be/src/runtime/load_path_mgr.cpp @@ -24,6 +24,7 @@ #include +#include "env/env.h" #include "olap/olap_define.h" #include "olap/storage_engine.h" #include "util/file_utils.h" @@ -168,9 +169,12 @@ void LoadPathMgr::process_path(time_t now, const std::string& path, int64_t rese } void LoadPathMgr::clean_one_path(const std::string& path) { + Env* env = Env::Default(); + std::vector dbs; - Status status = FileUtils::scan_dir(path, &dbs); - if (!status.ok()) { + Status status = FileUtils::list_files(env, path, &dbs); + // path may not exist + if (!status.ok() && !status.is_not_found()) { LOG(WARNING) << "scan one path to delete directory failed. path=" << path; return; } @@ -179,7 +183,7 @@ void LoadPathMgr::clean_one_path(const std::string& path) { for (auto& db : dbs) { std::string db_dir = path + "/" + db; std::vector sub_dirs; - status = FileUtils::scan_dir(db_dir, &sub_dirs); + status = FileUtils::list_files(env, db_dir, &sub_dirs); if (!status.ok()) { LOG(WARNING) << "scan db of trash dir failed, continue. dir=" << db_dir; continue; @@ -192,7 +196,7 @@ void LoadPathMgr::clean_one_path(const std::string& path) { // sub_dir starts with SHARD_PREFIX // process shard sub dir std::vector labels; - Status status = FileUtils::scan_dir(sub_path, &labels); + Status status = FileUtils::list_files(env, sub_path, &labels); if (!status.ok()) { LOG(WARNING) << "scan one path to delete directory failed. path=" << sub_path; continue; @@ -217,9 +221,11 @@ void LoadPathMgr::clean() { } void LoadPathMgr::clean_error_log() { + Env* env = Env::Default(); + time_t now = time(nullptr); std::vector sub_dirs; - Status status = FileUtils::scan_dir(_error_log_dir, &sub_dirs); + Status status = FileUtils::list_files(env, _error_log_dir, &sub_dirs); if (!status.ok()) { LOG(WARNING) << "scan error_log dir failed. dir=" << _error_log_dir; return; @@ -232,7 +238,7 @@ void LoadPathMgr::clean_error_log() { // sub_dir starts with SHARD_PREFIX // process shard sub dir std::vector error_log_files; - Status status = FileUtils::scan_dir(sub_path, &error_log_files); + Status status = FileUtils::list_files(env, sub_path, &error_log_files); if (!status.ok()) { LOG(WARNING) << "scan one path to delete directory failed. path=" << sub_path; continue; diff --git a/be/src/runtime/small_file_mgr.cpp b/be/src/runtime/small_file_mgr.cpp index ced27a3477..7b6c80fbcb 100644 --- a/be/src/runtime/small_file_mgr.cpp +++ b/be/src/runtime/small_file_mgr.cpp @@ -21,17 +21,18 @@ #include #include +#include // boost::split +#include // boost::algorithm::starts_with +#include // boost::is_any_of + #include "common/status.h" +#include "env/env.h" +#include "gen_cpp/HeartbeatService.h" #include "http/http_client.h" #include "runtime/exec_env.h" #include "util/file_utils.h" #include "util/md5.h" -#include // boost::split -#include // boost::algorithm::starts_with -#include // boost::is_any_of - -#include "gen_cpp/HeartbeatService.h" namespace doris { @@ -53,15 +54,18 @@ Status SmallFileMgr::init() { Status SmallFileMgr::_load_local_files() { RETURN_IF_ERROR(FileUtils::create_dir(_local_path)); - auto scan_cb = [this] (const std::string& dir, const std::string& file) { - auto st = _load_single_file(dir, file); + auto scan_cb = [this] (const char* file) { + if (is_dot_or_dotdot(file)) { + return true; + } + auto st = _load_single_file(_local_path, file); if (!st.ok()) { LOG(WARNING) << "load small file failed: " << st.get_error_msg(); } return true; }; - RETURN_IF_ERROR(FileUtils::scan_dir(_local_path, scan_cb)); + RETURN_IF_ERROR(Env::Default()->iterate_dir(_local_path, scan_cb)); return Status::OK(); } diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index 8f560e74bf..cf2fc0b49b 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -26,6 +26,7 @@ #include "gen_cpp/HeartbeatService_types.h" #include "common/logging.h" +#include "env/env.h" #include "exec/broker_reader.h" #include "exec/broker_writer.h" #include "olap/file_helper.h" @@ -771,7 +772,7 @@ Status SnapshotLoader::_get_existing_files_from_local( const std::string& local_path, std::vector* local_files) { - Status status = FileUtils::scan_dir(local_path, local_files); + Status status = FileUtils::list_files(Env::Default(), local_path, local_files); if (!status.ok()) { std::stringstream ss; ss << "failed to list files in local path: " << local_path << ", msg: " diff --git a/be/src/runtime/user_function_cache.cpp b/be/src/runtime/user_function_cache.cpp index 26b6ebdae5..9e15435319 100644 --- a/be/src/runtime/user_function_cache.cpp +++ b/be/src/runtime/user_function_cache.cpp @@ -24,6 +24,7 @@ #include // boost::algorithm::ends_with #include // boost::is_any_of +#include "env/env.h" #include "http/http_client.h" #include "util/dynamic_util.h" #include "util/file_utils.h" @@ -155,17 +156,21 @@ Status UserFunctionCache::_load_cached_lib() { // create library directory if not exist RETURN_IF_ERROR(FileUtils::create_dir(_lib_dir)); - auto scan_cb = [this] (const std::string& dir, const std::string& file) { - auto st = _load_entry_from_lib(dir, file); - if (!st.ok()) { - LOG(WARNING) << "load a library failed, dir=" << dir << ", file=" << file; - } - return true; - }; for (int i = 0; i < kLibShardNum; ++i) { std::string sub_dir = _lib_dir + "/" + std::to_string(i); RETURN_IF_ERROR(FileUtils::create_dir(sub_dir)); - RETURN_IF_ERROR(FileUtils::scan_dir(sub_dir, scan_cb)); + + auto scan_cb = [this, &sub_dir] (const char* file) { + if (is_dot_or_dotdot(file)) { + return true; + } + auto st = _load_entry_from_lib(sub_dir, file); + if (!st.ok()) { + LOG(WARNING) << "load a library failed, dir=" << sub_dir << ", file=" << file; + } + return true; + }; + RETURN_IF_ERROR(Env::Default()->iterate_dir(sub_dir, scan_cb)); } return Status::OK(); } diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp index 8e71f42c36..a4c08ea52d 100644 --- a/be/src/util/doris_metrics.cpp +++ b/be/src/util/doris_metrics.cpp @@ -20,6 +20,8 @@ #include "util/doris_metrics.h" +#include "env/env.h" + #include "util/debug_util.h" #include "util/file_utils.h" #include "util/system_metrics.h" @@ -303,7 +305,7 @@ void DorisMetrics::_update_process_thread_num() { ss << "/proc/" << pid << "/task/"; int64_t count = 0; - Status st = FileUtils::scan_dir(ss.str(), nullptr, &count); + Status st = FileUtils::get_children_count(Env::Default(), ss.str(), &count); if (!st.ok()) { LOG(WARNING) << "failed to count thread num from: " << ss.str(); process_thread_num.set_value(0); @@ -321,7 +323,7 @@ void DorisMetrics::_update_process_fd_num() { std::stringstream ss; ss << "/proc/" << pid << "/fd/"; int64_t count = 0; - Status st = FileUtils::scan_dir(ss.str(), nullptr, &count); + Status st = FileUtils::get_children_count(Env::Default(), ss.str(), &count); if (!st.ok()) { LOG(WARNING) << "failed to count fd from: " << ss.str(); process_fd_num_used.set_value(0); diff --git a/be/src/util/file_utils.cpp b/be/src/util/file_utils.cpp index 8073ee39e1..7caa90e27e 100644 --- a/be/src/util/file_utils.cpp +++ b/be/src/util/file_utils.cpp @@ -33,6 +33,7 @@ #include +#include "env/env.h" #include "olap/file_helper.h" #include "util/defer_op.h" @@ -81,70 +82,25 @@ Status FileUtils::remove_all(const std::string& file_path) { return Status::OK(); } -Status FileUtils::scan_dir( - const std::string& dir_path, std::vector* files, - int64_t* file_count) { - - DIR* dir = opendir(dir_path.c_str()); - if (dir == nullptr) { - char buf[64]; - std::stringstream ss; - ss << "opendir(" << dir_path << ") failed, because: " << strerror_r(errno, buf, 64); - return Status::InternalError(ss.str()); - } - DeferOp close_dir(std::bind(&closedir, dir)); - - int64_t count = 0; - while (true) { - auto result = readdir(dir); - if (result == nullptr) { - break; +Status FileUtils::list_files(Env* env, const std::string& dir, + std::vector* files) { + auto cb = [files](const char* name) -> bool { + if (!is_dot_or_dotdot(name)) { + files->push_back(name); } - std::string file_name = result->d_name; - if (file_name == "." || file_name == "..") { - continue; - } - - if (files != nullptr) { - files->emplace_back(std::move(file_name)); - } - count++; - } - - if (file_count != nullptr) { - *file_count = count; - } - - return Status::OK(); + return true; + }; + return env->iterate_dir(dir, cb); } -Status FileUtils::scan_dir( - const std::string& dir_path, - const std::function& callback) { - auto dir_closer = [] (DIR* dir) { closedir(dir); }; - std::unique_ptr dir(opendir(dir_path.c_str()), dir_closer); - if (dir == nullptr) { - char buf[64]; - LOG(WARNING) << "fail to open dir, dir=" << dir_path << ", errmsg=" << strerror_r(errno, buf, 64); - return Status::InternalError("fail to opendir"); - } - - while (true) { - auto result = readdir(dir.get()); - if (result == nullptr) { - break; +Status FileUtils::get_children_count(Env* env, const std::string& dir, int64_t* count) { + auto cb = [count](const char* name) -> bool { + if (!is_dot_or_dotdot(name)) { + *count += 1; } - std::string file_name = result->d_name; - if (file_name == "." || file_name == "..") { - continue; - } - auto is_continue = callback(dir_path, file_name); - if (!is_continue) { - break; - } - } - - return Status::OK(); + return true; + }; + return env->iterate_dir(dir, cb); } bool FileUtils::is_dir(const std::string& path) { diff --git a/be/src/util/file_utils.h b/be/src/util/file_utils.h index dd0343f7db..80438417b4 100644 --- a/be/src/util/file_utils.h +++ b/be/src/util/file_utils.h @@ -25,8 +25,16 @@ namespace doris { +class Env; + +// Return true if file is '.' or '..' +inline bool is_dot_or_dotdot(const char* name) { + return name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); +} + class FileUtils { public: + // Create directory of dir_path, // This function will create directory recursively, // if dir's parent directory doesn't exist @@ -38,16 +46,18 @@ public: // Delete file recursively. static Status remove_all(const std::string& dir_path); - // Scan dir path and return all files in this path without '.' and '..' - // Item in files is the filename in 'dir_path', which is not absolute path - // if files == nullptr, no file names will be returned. - // if file_count != nullptr, it will save the number of files. - static Status scan_dir( - const std::string& dir_path, std::vector* files, - int64_t* file_count = nullptr); - static Status scan_dir( - const std::string& dir_path, - const std::function& callback); + // List all files in the specified directory without '.' and '..'. + // If you want retreive all files, you can use Env::iterate_dir. + // All valid files will be stored in given *files. + static Status list_files( + Env* env, + const std::string& dir, + std::vector* files); + + // Get the number of children belong to the specified directory, this + // funciton also exclude '.' and '..'. + // Return OK with *count is set to the count, if execute successful. + static Status get_children_count(Env* env, const std::string& dir, int64_t* count); // If the file_path is not exist, or is not a dir, return false. static bool is_dir(const std::string& file_path); diff --git a/be/test/env/env_posix_test.cpp b/be/test/env/env_posix_test.cpp index eef4e1b16e..d21eb684d2 100644 --- a/be/test/env/env_posix_test.cpp +++ b/be/test/env/env_posix_test.cpp @@ -19,7 +19,10 @@ #include +#include + #include "common/logging.h" +#include "util/file_utils.h" namespace doris { @@ -201,6 +204,42 @@ TEST_F(EnvPosixTest, random_rw) { } } +TEST_F(EnvPosixTest, iterate_dir) { + std::string dir_path = "./ut_dir/env_posix/iterate_dir"; + FileUtils::remove_all(dir_path); + auto st = Env::Default()->create_dir_if_missing(dir_path); + ASSERT_TRUE(st.ok()); + + st = Env::Default()->create_dir_if_missing(dir_path + "/abc"); + ASSERT_TRUE(st.ok()); + + st = Env::Default()->create_dir_if_missing(dir_path + "/123"); + ASSERT_TRUE(st.ok()); + + { + std::vector children; + st = Env::Default()->get_children(dir_path, &children); + ASSERT_EQ(4, children.size()); + std::sort(children.begin(), children.end()); + + ASSERT_STREQ(".", children[0].c_str()); + ASSERT_STREQ("..", children[1].c_str()); + ASSERT_STREQ("123", children[2].c_str()); + ASSERT_STREQ("abc", children[3].c_str()); + } + { + std::vector children; + st = FileUtils::list_files(Env::Default(), dir_path, &children); + ASSERT_EQ(2, children.size()); + std::sort(children.begin(), children.end()); + + ASSERT_STREQ("123", children[0].c_str()); + ASSERT_STREQ("abc", children[1].c_str()); + } + + FileUtils::remove_all(dir_path); +} + } int main(int argc, char* argv[]) {