diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index dccddede84..2159056001 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -86,6 +86,7 @@ set(UTIL_FILES zip_util.cpp utf8_check.cpp cgroup_util.cpp + path_util.cpp ) if (WITH_MYSQL) diff --git a/be/src/util/path_util.cpp b/be/src/util/path_util.cpp new file mode 100644 index 0000000000..e9eb482fd9 --- /dev/null +++ b/be/src/util/path_util.cpp @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/path_util.h" + +#include +#include +// Use the POSIX version of dirname(3). See `man 3 dirname` +#include + +#include "common/logging.h" +#include "gutil/strings/split.h" +#include "gutil/strings/stringpiece.h" +#include "gutil/strings/strip.h" + +using std::string; +using std::vector; +using strings::SkipEmpty; +using strings::Split; + +namespace doris { +namespace path_util { + +const string kTmpInfix = ".doristmp"; + +string join_path_segments(const string& a, const string& b) { + if (a.empty()) { + return b; + } else if (b.empty()) { + return a; + } else { + return StripSuffixString(a, "/") + "/" + StripPrefixString(b, "/"); + } +} + +vector join_path_segments_v(const vector& v, const string& s) { + vector out; + for (const string& path : v) { + out.emplace_back(join_path_segments(path, s)); + } + return out; +} + +vector split_path(const string& path) { + if (path.empty()) { + return {}; + } + vector segments; + if (path[0] == '/') { + segments.emplace_back("/"); + } + vector pieces = Split(path, "/", SkipEmpty()); + for (const StringPiece& piece : pieces) { + segments.emplace_back(piece.data(), piece.size()); + } + return segments; +} + +string dir_name(const string& path) { + std::unique_ptr path_copy(strdup(path.c_str())); + return dirname(path_copy.get()); +} + +string base_name(const string& path) { + std::unique_ptr path_copy(strdup(path.c_str())); + return basename(path_copy.get()); +} + +} // namespace path_util +} // namespace doris diff --git a/be/src/util/path_util.h b/be/src/util/path_util.h new file mode 100644 index 0000000000..b1e5f1a0a3 --- /dev/null +++ b/be/src/util/path_util.h @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// Utility methods for dealing with file paths. +#pragma once + +#include +#include + +namespace doris { +namespace path_util { + +// NOTE: The methods here are only related to path processing, do not involve +// any file and IO operations. +extern const std::string kTmpInfix; + +// Join two path segments with the appropriate path separator, if necessary. +std::string join_path_segments(const std::string& a, const std::string& b); + +// Join each path segment in a list with a common suffix segment. +std::vector join_path_segments_v(const std::vector& v, + const std::string& s); + +// Split a path into segments with the appropriate path separator. +std::vector split_path(const std::string& path); + +// Return the enclosing directory of path. +// This is like dirname(3) but for C++ strings. +// The following list of examples shows the strings returned by dirname() and basename(): +// path dirname basename +// "/usr/lib" "/usr" "lib" +// "/usr/" "/" "usr" +// "usr" "." "usr" +// "/" "/" "/" +// "." "." "." +// ".." "." ".." +std::string dir_name(const std::string& path); + +// Return the terminal component of a path. +// This is like basename(3) but for C++ strings. +std::string base_name(const std::string& path); + +} // namespace path_util +} // namespace doris diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index bf2f38be80..3154de5480 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -56,3 +56,5 @@ ADD_BE_TEST(radix_sort_test) ADD_BE_TEST(zip_util_test) ADD_BE_TEST(utf8_check_test) ADD_BE_TEST(cgroup_util_test) +ADD_BE_TEST(path_util_test) + diff --git a/be/test/util/path_util_test.cpp b/be/test/util/path_util_test.cpp new file mode 100644 index 0000000000..7cf10481cb --- /dev/null +++ b/be/test/util/path_util_test.cpp @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/path_util.h" + +#include +#include + +#include + +#include "common/config.h" +#include "util/logging.h" + +using std::string; +using std::vector; + +namespace doris { + +TEST(TestPathUtil, JoinPathSegments) { + ASSERT_EQ("a", path_util::join_path_segments("a", "")); + ASSERT_EQ("b", path_util::join_path_segments("", "b")); + ASSERT_EQ("a/b", path_util::join_path_segments("a", "b")); + ASSERT_EQ("a/b", path_util::join_path_segments("a/", "b")); + ASSERT_EQ("a/b", path_util::join_path_segments("a", "/b")); + ASSERT_EQ("a/b", path_util::join_path_segments("a/", "/b")); +} + +TEST(TestPathUtil, BaseNameTest) { + ASSERT_EQ(".", path_util::base_name("")); + ASSERT_EQ(".", path_util::base_name(".")); + ASSERT_EQ("..", path_util::base_name("..")); + ASSERT_EQ("/", path_util::base_name("/")); + ASSERT_EQ("/", path_util::base_name("//")); + ASSERT_EQ("a", path_util::base_name("a")); + ASSERT_EQ("ab", path_util::base_name("ab")); + ASSERT_EQ("ab", path_util::base_name("ab/")); + ASSERT_EQ("cd", path_util::base_name("ab/cd")); + ASSERT_EQ("ab", path_util::base_name("/ab")); + ASSERT_EQ("ab", path_util::base_name("/ab///")); + ASSERT_EQ("cd", path_util::base_name("/ab/cd")); +} + +TEST(TestPathUtil, DirNameTest) { + ASSERT_EQ(".", path_util::dir_name("")); + ASSERT_EQ(".", path_util::dir_name(".")); + ASSERT_EQ(".", path_util::dir_name("..")); + ASSERT_EQ("/", path_util::dir_name("/")); + ASSERT_EQ("//", path_util::dir_name("//")); + ASSERT_EQ(".", path_util::dir_name("a")); + ASSERT_EQ(".", path_util::dir_name("ab")); + ASSERT_EQ(".", path_util::dir_name("ab/")); + ASSERT_EQ("ab", path_util::dir_name("ab/cd")); + ASSERT_EQ("/", path_util::dir_name("/ab")); + ASSERT_EQ("/", path_util::dir_name("/ab///")); + ASSERT_EQ("/ab", path_util::dir_name("/ab/cd")); +} + +TEST(TestPathUtil, SplitPathTest) { + using Vec = vector; + ASSERT_EQ(Vec({"/"}), path_util::split_path("/")); + ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a/b")); + ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a/b/")); + ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a//b/")); + ASSERT_EQ(Vec({"a", "b"}), path_util::split_path("a/b")); + ASSERT_EQ(Vec({"."}), path_util::split_path(".")); + ASSERT_EQ(Vec(), path_util::split_path("")); +} + +} // namespace doris + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/run-ut.sh b/run-ut.sh index bd077242e5..297283d0ea 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -167,6 +167,7 @@ ${DORIS_TEST_BINARY_DIR}/util/frame_of_reference_coding_test ${DORIS_TEST_BINARY_DIR}/util/zip_util_test ${DORIS_TEST_BINARY_DIR}/util/utf8_check_test ${DORIS_TEST_BINARY_DIR}/util/cgroup_util_test +${DORIS_TEST_BINARY_DIR}/util/path_util_test # Running common Unittest ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test