diff --git a/be/src/http/download_action.cpp b/be/src/http/download_action.cpp index e4ac777c41..47a299dbe1 100644 --- a/be/src/http/download_action.cpp +++ b/be/src/http/download_action.cpp @@ -25,9 +25,6 @@ #include #include -#include "boost/lexical_cast.hpp" -#include - #include "agent/cgroups_mgr.h" #include "env/env.h" #include "http/http_channel.h" @@ -35,12 +32,10 @@ #include "http/http_request.h" #include "http/http_response.h" #include "http/http_status.h" -#include "util/defer_op.h" +#include "runtime/exec_env.h" #include "util/file_utils.h" #include "util/filesystem_util.h" -#include "runtime/exec_env.h" - -using boost::filesystem::canonical; +#include "util/path_util.h" namespace doris { @@ -190,8 +185,7 @@ void DownloadAction::do_file_response(const std::string& file_path, HttpRequest if (req->method() == HttpMethod::HEAD) { close(fd); - req->add_output_header(HttpHeaders::CONTENT_LENGTH, - boost::lexical_cast(file_size).c_str()); + req->add_output_header(HttpHeaders::CONTENT_LENGTH, std::to_string(file_size).c_str()); HttpChannel::send_reply(req); return; } @@ -199,26 +193,9 @@ void DownloadAction::do_file_response(const std::string& file_path, HttpRequest HttpChannel::send_file(req, fd, 0, file_size); } -// If 'file_name' contains a dot but does not consist solely of one or to two dots, -// returns the substring of file_name starting at the rightmost dot and ending at the path's end. -// Otherwise, returns an empty string -std::string DownloadAction::get_file_extension(const std::string& file_name) { - // Get file Extention - std::string file_extension; - for (int i = file_name.size() - 1; i > 0; --i) { - if (file_name[i] == '/') { - break; - } - if (file_name[i] == '.' && file_name[i-1] != '.') { - return std::string(file_name, i); - } - } - return file_extension; -} - // Do a simple decision, only deal a few type std::string DownloadAction::get_content_type(const std::string& file_name) { - std::string file_ext = get_file_extension(file_name); + std::string file_ext = path_util::file_extension(file_name); LOG(INFO) << "file_name: " << file_name << "; file extension: [" << file_ext << "]"; if (file_ext == std::string(".html") || file_ext == std::string(".htm")) { @@ -232,7 +209,7 @@ std::string DownloadAction::get_content_type(const std::string& file_name) { } else { return "text/plain; charset=utf-8"; } - return std::string(); + return ""; } Status DownloadAction::check_token(HttpRequest *req) { @@ -250,12 +227,12 @@ Status DownloadAction::check_token(HttpRequest *req) { Status DownloadAction::check_path_is_allowed(const std::string& file_path) { DCHECK_EQ(_download_type, NORMAL); - + std::string canonical_file_path; RETURN_WITH_WARN_IF_ERROR(FileUtils::canonicalize(file_path, &canonical_file_path), Status::InternalError("file path is invalid: " + file_path), "file path is invalid: " + file_path); - + for (auto& allow_path : _allow_paths) { if (FileSystemUtil::contain_path(allow_path, canonical_file_path)) { return Status::OK(); @@ -272,7 +249,7 @@ Status DownloadAction::check_log_path_is_allowed(const std::string& file_path) { RETURN_WITH_WARN_IF_ERROR(FileUtils::canonicalize(file_path, &canonical_file_path), Status::InternalError("file path is invalid: " + file_path), "file path is invalid: " + file_path); - + if (FileSystemUtil::contain_path(_error_log_root_dir, canonical_file_path)) { return Status::OK(); } diff --git a/be/src/http/download_action.h b/be/src/http/download_action.h index 737181d8ac..a82f19559c 100644 --- a/be/src/http/download_action.h +++ b/be/src/http/download_action.h @@ -67,8 +67,6 @@ private: int64_t get_file_size(FILE* fp); - std::string get_file_extension(const std::string& file_name); - std::string get_content_type(const std::string& file_name); ExecEnv* _exec_env; diff --git a/be/src/olap/options.cpp b/be/src/olap/options.cpp index 8717ae30d1..938243f7da 100644 --- a/be/src/olap/options.cpp +++ b/be/src/olap/options.cpp @@ -20,129 +20,130 @@ #include #include "common/logging.h" -#include "olap/utils.h" - +#include "common/status.h" +#include "env/env.h" #include "gutil/strings/split.h" +#include "gutil/strings/substitute.h" +#include "olap/utils.h" +#include "util/path_util.h" namespace doris { +using std::string; +using std::vector; + static std::string CAPACITY_UC = "CAPACITY"; static std::string MEDIUM_UC = "MEDIUM"; static std::string SSD_UC = "SSD"; static std::string HDD_UC = "HDD"; // TODO: should be a general util method -std::string to_upper(const std::string& str) { +static std::string to_upper(const std::string& str) { std::string out = str; std::transform(out.begin(), out.end(), out.begin(), ::toupper); return out; } -// compatible with old multi path configuration: -// /path1,1024;/path2,2048 -OLAPStatus parse_root_path(const std::string& root_path, StorePath* path) { - try { - std::vector tmp_vec = strings::Split(root_path, ",", strings::SkipWhitespace()); +// Currently, both of two following formats are supported(see be.conf) +// format 1: /home/disk1/palo.HDD,50 +// format 2: /home/disk1/palo,medium:ssd,capacity:50 +OLAPStatus parse_root_path(const string& root_path, StorePath* path) { + vector tmp_vec = strings::Split(root_path, ",", strings::SkipWhitespace()); - // parse root path name - StripWhiteSpace(&tmp_vec[0]); - tmp_vec[0].erase(tmp_vec[0].find_last_not_of("/") + 1); - if (tmp_vec[0].empty() || tmp_vec[0][0] != '/') { - LOG(WARNING) << "invalid store path. path=" << tmp_vec[0]; + // parse root path name + StripWhiteSpace(&tmp_vec[0]); + tmp_vec[0].erase(tmp_vec[0].find_last_not_of("/") + 1); + if (tmp_vec[0].empty() || tmp_vec[0][0] != '/') { + LOG(WARNING) << "invalid store path. path=" << tmp_vec[0]; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + + string canonicalized_path; + Status status = Env::Default()->canonicalize(tmp_vec[0], &canonicalized_path); + if (!status.ok()) { + LOG(WARNING) << "path can not be canonicalized. may be not exist. path=" << tmp_vec[0]; + return OLAP_ERR_INPUT_PARAMETER_ERROR; + } + path->path = tmp_vec[0]; + + // parse root path capacity and storage medium + string capacity_str; + string medium_str = HDD_UC; + + string extension = path_util::file_extension(canonicalized_path); + if (!extension.empty()) { + medium_str = to_upper(extension.substr(1)); + } + + for (int i = 1; i < tmp_vec.size(); i++) { + // : or + string property; + string value; + std::pair pair = strings::Split( + tmp_vec[i], strings::delimiter::Limit(":", 1)); + if (pair.second.empty()) { + // format_1: only supports setting capacity + property = CAPACITY_UC; + value = tmp_vec[i]; + } else { + // format_2 + property = to_upper(pair.first); + value = pair.second; + } + + StripWhiteSpace(&property); + StripWhiteSpace(&value); + if (property == CAPACITY_UC) { + capacity_str = value; + } else if (property == MEDIUM_UC) { + // property 'medium' has a higher priority than the extension of + // path, so it can override medium_str + medium_str = to_upper(value); + } else { + LOG(WARNING) << "invalid property of store path, " << tmp_vec[i]; return OLAP_ERR_INPUT_PARAMETER_ERROR; } - path->path = tmp_vec[0]; + } - // parse root path capacity and storage medium - std::string capacity_str; - std::string medium_str; - - boost::filesystem::path boost_path = tmp_vec[0]; - std::string extension = boost::filesystem::canonical(boost_path).extension().string(); - if (!extension.empty()) { - medium_str = to_upper(extension.substr(1)); + path->capacity_bytes = -1; + if (!capacity_str.empty()) { + if (!valid_signed_number(capacity_str) + || strtol(capacity_str.c_str(), NULL, 10) < 0) { + LOG(WARNING) << "invalid capacity of store path, capacity=" << capacity_str; + return OLAP_ERR_INPUT_PARAMETER_ERROR; } + path->capacity_bytes = strtol(capacity_str.c_str(), NULL, 10) * GB_EXCHANGE_BYTE; + } - for (int i = 1; i < tmp_vec.size(); i++) { - // : or - std::string property; - std::string value; - std::pair pair = strings::Split( - tmp_vec[i], strings::delimiter::Limit(":", 1)); - if (!pair.second.empty()) { - property = to_upper(pair.first); - value = pair.second; - } else { - // only supports setting capacity - property = CAPACITY_UC; - value = tmp_vec[i]; - } - - StripWhiteSpace(&property); - StripWhiteSpace(&value); - if (property == CAPACITY_UC) { - capacity_str = value; - } else if (property == MEDIUM_UC) { - // property 'medium' has a higher priority than the extension of - // path, so it can override medium_str - medium_str = to_upper(value); - } else { - LOG(WARNING) << "invalid property of store path, " << property; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } + path->storage_medium = TStorageMedium::HDD; + if (!medium_str.empty()) { + if (medium_str == SSD_UC) { + path->storage_medium = TStorageMedium::SSD; + } else if (medium_str == HDD_UC) { + path->storage_medium = TStorageMedium::HDD; + } else { + LOG(WARNING) << "invalid storage medium. medium=" << medium_str; + return OLAP_ERR_INPUT_PARAMETER_ERROR; } + } - path->capacity_bytes = -1; - if (!capacity_str.empty()) { - if (!valid_signed_number(capacity_str) - || strtol(capacity_str.c_str(), NULL, 10) < 0) { - LOG(WARNING) << "invalid capacity of store path, capacity=" - << capacity_str; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - path->capacity_bytes = - strtol(capacity_str.c_str(), NULL, 10) * GB_EXCHANGE_BYTE; - } + return OLAP_SUCCESS; +} - path->storage_medium = TStorageMedium::HDD; - if (!medium_str.empty()) { - if (medium_str == SSD_UC) { - path->storage_medium = TStorageMedium::SSD; - } else if (medium_str == HDD_UC) { - path->storage_medium = TStorageMedium::HDD; - } else { - LOG(WARNING) << "invalid storage medium. medium=" << medium_str; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - } - } catch (...) { - LOG(WARNING) << "invalid store path. path=" << root_path; +OLAPStatus parse_conf_store_paths(const string& config_path, vector* paths) { + vector path_vec = strings::Split(config_path, ";", strings::SkipWhitespace()); + for (auto& item : path_vec) { + StorePath path; + RETURN_NOT_OK_LOG(parse_root_path(item, &path), + strings::Substitute("fail to parse store path. path=$0", item)); + paths->emplace_back(std::move(path)); + } + if (paths->empty()) { + LOG(WARNING) << "fail to parse storage_root_path config. value=[" << config_path << "]"; return OLAP_ERR_INPUT_PARAMETER_ERROR; } return OLAP_SUCCESS; } -OLAPStatus parse_conf_store_paths(const std::string& config_path, - std::vector* paths) { - try { - std::vector path_vec = strings::Split( - config_path, ";", strings::SkipWhitespace()); - for (auto& item : path_vec) { - StorePath path; - auto res = parse_root_path(item, &path); - if (res != OLAP_SUCCESS) { - LOG(WARNING) << "get config store path failed. path=" - << config_path; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - paths->emplace_back(std::move(path)); - } - } catch (...) { - LOG(WARNING) << "get config store path failed. path=" << config_path; - return OLAP_ERR_INPUT_PARAMETER_ERROR; - } - - return OLAP_SUCCESS; -} -} +} // end namespace doris diff --git a/be/src/olap/options.h b/be/src/olap/options.h index d8fc939332..9e478c81ea 100644 --- a/be/src/olap/options.h +++ b/be/src/olap/options.h @@ -50,6 +50,7 @@ OLAPStatus parse_conf_store_paths(const std::string& config_path, struct EngineOptions { // list paths that tablet will be put into. std::vector store_paths; + // BE's UUID. It will be reset every time BE restarts. UniqueId backend_uid{0, 0}; }; } diff --git a/be/src/util/path_util.cpp b/be/src/util/path_util.cpp index e9eb482fd9..33a8535809 100644 --- a/be/src/util/path_util.cpp +++ b/be/src/util/path_util.cpp @@ -80,5 +80,15 @@ string base_name(const string& path) { return basename(path_copy.get()); } +string file_extension(const string& path) { + string file_name = base_name(path); + if (file_name == "." || file_name == "..") { + return ""; + } + + string::size_type pos = file_name.rfind("."); + return pos == string::npos ? "" : file_name.substr(pos); +} + } // namespace path_util } // namespace doris diff --git a/be/src/util/path_util.h b/be/src/util/path_util.h index b1e5f1a0a3..a3816ef862 100644 --- a/be/src/util/path_util.h +++ b/be/src/util/path_util.h @@ -26,6 +26,7 @@ namespace path_util { // NOTE: The methods here are only related to path processing, do not involve // any file and IO operations. + extern const std::string kTmpInfix; // Join two path segments with the appropriate path separator, if necessary. @@ -54,5 +55,14 @@ std::string dir_name(const std::string& path); // This is like basename(3) but for C++ strings. std::string base_name(const std::string& path); +// It is used to replace boost::filesystem::path::extension(). +// If the filename contains a dot but does not consist solely of one or to two dots, +// returns the substring of file_name starting at the rightmost dot and ending at +// the path's end. Otherwise, returns an empty string. +// The dot is included in the return value so that it is possible to distinguish +// between no extension and an empty extension. +// NOTE: path can be either one file's full path or only file name +std::string file_extension(const std::string& path); + } // namespace path_util } // namespace doris diff --git a/be/test/util/path_util_test.cpp b/be/test/util/path_util_test.cpp index 7cf10481cb..62c1839d2c 100644 --- a/be/test/util/path_util_test.cpp +++ b/be/test/util/path_util_test.cpp @@ -80,6 +80,32 @@ TEST(TestPathUtil, SplitPathTest) { ASSERT_EQ(Vec(), path_util::split_path("")); } +TEST(TestPathUtil, file_extension_test) { + ASSERT_EQ("", path_util::file_extension("")); + ASSERT_EQ("", path_util::file_extension(".")); + ASSERT_EQ("", path_util::file_extension("..")); + ASSERT_EQ("", path_util::file_extension("/")); + ASSERT_EQ("", path_util::file_extension("//")); + ASSERT_EQ("", path_util::file_extension("///")); + ASSERT_EQ("", path_util::file_extension("a")); + ASSERT_EQ("", path_util::file_extension("ab")); + ASSERT_EQ("", path_util::file_extension("ab/")); + ASSERT_EQ("", path_util::file_extension("ab/cd")); + ASSERT_EQ("", path_util::file_extension("/ab")); + ASSERT_EQ("", path_util::file_extension("/ab/")); + ASSERT_EQ("", path_util::file_extension("///ab///")); + ASSERT_EQ("", path_util::file_extension("/ab/cd")); + ASSERT_EQ("", path_util::file_extension("../ab/cd")); + + ASSERT_EQ(".a", path_util::file_extension(".a")); + ASSERT_EQ("", path_util::file_extension("a.b/c")); + ASSERT_EQ(".d", path_util::file_extension("a.b/c.d")); + ASSERT_EQ(".c", path_util::file_extension("a/b.c")); + ASSERT_EQ(".", path_util::file_extension("a/b.")); + ASSERT_EQ(".c", path_util::file_extension("a.b.c")); + ASSERT_EQ(".", path_util::file_extension("a.b.c.")); +} + } // namespace doris int main(int argc, char* argv[]) {