From c71eefa2acdaf4770055cc7b329019e26b81da49 Mon Sep 17 00:00:00 2001 From: LingBin Date: Sat, 18 Jan 2020 00:05:00 +0800 Subject: [PATCH] Add path util (#2747) Note that the methods in path_util are only related to path processing, and do not involve any file and IO operations The upcoming patch will use these util methods, used to extract operations such as concatenation of directory strings from processing logic. --- be/src/util/CMakeLists.txt | 1 + be/src/util/path_util.cpp | 84 +++++++++++++++++++++++++++++++ be/src/util/path_util.h | 58 ++++++++++++++++++++++ be/test/util/CMakeLists.txt | 2 + be/test/util/path_util_test.cpp | 88 +++++++++++++++++++++++++++++++++ run-ut.sh | 1 + 6 files changed, 234 insertions(+) create mode 100644 be/src/util/path_util.cpp create mode 100644 be/src/util/path_util.h create mode 100644 be/test/util/path_util_test.cpp diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index dccddede84..2159056001 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -86,6 +86,7 @@ set(UTIL_FILES zip_util.cpp utf8_check.cpp cgroup_util.cpp + path_util.cpp ) if (WITH_MYSQL) diff --git a/be/src/util/path_util.cpp b/be/src/util/path_util.cpp new file mode 100644 index 0000000000..e9eb482fd9 --- /dev/null +++ b/be/src/util/path_util.cpp @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/path_util.h" + +#include +#include +// Use the POSIX version of dirname(3). See `man 3 dirname` +#include + +#include "common/logging.h" +#include "gutil/strings/split.h" +#include "gutil/strings/stringpiece.h" +#include "gutil/strings/strip.h" + +using std::string; +using std::vector; +using strings::SkipEmpty; +using strings::Split; + +namespace doris { +namespace path_util { + +const string kTmpInfix = ".doristmp"; + +string join_path_segments(const string& a, const string& b) { + if (a.empty()) { + return b; + } else if (b.empty()) { + return a; + } else { + return StripSuffixString(a, "/") + "/" + StripPrefixString(b, "/"); + } +} + +vector join_path_segments_v(const vector& v, const string& s) { + vector out; + for (const string& path : v) { + out.emplace_back(join_path_segments(path, s)); + } + return out; +} + +vector split_path(const string& path) { + if (path.empty()) { + return {}; + } + vector segments; + if (path[0] == '/') { + segments.emplace_back("/"); + } + vector pieces = Split(path, "/", SkipEmpty()); + for (const StringPiece& piece : pieces) { + segments.emplace_back(piece.data(), piece.size()); + } + return segments; +} + +string dir_name(const string& path) { + std::unique_ptr path_copy(strdup(path.c_str())); + return dirname(path_copy.get()); +} + +string base_name(const string& path) { + std::unique_ptr path_copy(strdup(path.c_str())); + return basename(path_copy.get()); +} + +} // namespace path_util +} // namespace doris diff --git a/be/src/util/path_util.h b/be/src/util/path_util.h new file mode 100644 index 0000000000..b1e5f1a0a3 --- /dev/null +++ b/be/src/util/path_util.h @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +// Utility methods for dealing with file paths. +#pragma once + +#include +#include + +namespace doris { +namespace path_util { + +// NOTE: The methods here are only related to path processing, do not involve +// any file and IO operations. +extern const std::string kTmpInfix; + +// Join two path segments with the appropriate path separator, if necessary. +std::string join_path_segments(const std::string& a, const std::string& b); + +// Join each path segment in a list with a common suffix segment. +std::vector join_path_segments_v(const std::vector& v, + const std::string& s); + +// Split a path into segments with the appropriate path separator. +std::vector split_path(const std::string& path); + +// Return the enclosing directory of path. +// This is like dirname(3) but for C++ strings. +// The following list of examples shows the strings returned by dirname() and basename(): +// path dirname basename +// "/usr/lib" "/usr" "lib" +// "/usr/" "/" "usr" +// "usr" "." "usr" +// "/" "/" "/" +// "." "." "." +// ".." "." ".." +std::string dir_name(const std::string& path); + +// Return the terminal component of a path. +// This is like basename(3) but for C++ strings. +std::string base_name(const std::string& path); + +} // namespace path_util +} // namespace doris diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index bf2f38be80..3154de5480 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -56,3 +56,5 @@ ADD_BE_TEST(radix_sort_test) ADD_BE_TEST(zip_util_test) ADD_BE_TEST(utf8_check_test) ADD_BE_TEST(cgroup_util_test) +ADD_BE_TEST(path_util_test) + diff --git a/be/test/util/path_util_test.cpp b/be/test/util/path_util_test.cpp new file mode 100644 index 0000000000..7cf10481cb --- /dev/null +++ b/be/test/util/path_util_test.cpp @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/path_util.h" + +#include +#include + +#include + +#include "common/config.h" +#include "util/logging.h" + +using std::string; +using std::vector; + +namespace doris { + +TEST(TestPathUtil, JoinPathSegments) { + ASSERT_EQ("a", path_util::join_path_segments("a", "")); + ASSERT_EQ("b", path_util::join_path_segments("", "b")); + ASSERT_EQ("a/b", path_util::join_path_segments("a", "b")); + ASSERT_EQ("a/b", path_util::join_path_segments("a/", "b")); + ASSERT_EQ("a/b", path_util::join_path_segments("a", "/b")); + ASSERT_EQ("a/b", path_util::join_path_segments("a/", "/b")); +} + +TEST(TestPathUtil, BaseNameTest) { + ASSERT_EQ(".", path_util::base_name("")); + ASSERT_EQ(".", path_util::base_name(".")); + ASSERT_EQ("..", path_util::base_name("..")); + ASSERT_EQ("/", path_util::base_name("/")); + ASSERT_EQ("/", path_util::base_name("//")); + ASSERT_EQ("a", path_util::base_name("a")); + ASSERT_EQ("ab", path_util::base_name("ab")); + ASSERT_EQ("ab", path_util::base_name("ab/")); + ASSERT_EQ("cd", path_util::base_name("ab/cd")); + ASSERT_EQ("ab", path_util::base_name("/ab")); + ASSERT_EQ("ab", path_util::base_name("/ab///")); + ASSERT_EQ("cd", path_util::base_name("/ab/cd")); +} + +TEST(TestPathUtil, DirNameTest) { + ASSERT_EQ(".", path_util::dir_name("")); + ASSERT_EQ(".", path_util::dir_name(".")); + ASSERT_EQ(".", path_util::dir_name("..")); + ASSERT_EQ("/", path_util::dir_name("/")); + ASSERT_EQ("//", path_util::dir_name("//")); + ASSERT_EQ(".", path_util::dir_name("a")); + ASSERT_EQ(".", path_util::dir_name("ab")); + ASSERT_EQ(".", path_util::dir_name("ab/")); + ASSERT_EQ("ab", path_util::dir_name("ab/cd")); + ASSERT_EQ("/", path_util::dir_name("/ab")); + ASSERT_EQ("/", path_util::dir_name("/ab///")); + ASSERT_EQ("/ab", path_util::dir_name("/ab/cd")); +} + +TEST(TestPathUtil, SplitPathTest) { + using Vec = vector; + ASSERT_EQ(Vec({"/"}), path_util::split_path("/")); + ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a/b")); + ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a/b/")); + ASSERT_EQ(Vec({"/", "a", "b"}), path_util::split_path("/a//b/")); + ASSERT_EQ(Vec({"a", "b"}), path_util::split_path("a/b")); + ASSERT_EQ(Vec({"."}), path_util::split_path(".")); + ASSERT_EQ(Vec(), path_util::split_path("")); +} + +} // namespace doris + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/run-ut.sh b/run-ut.sh index bd077242e5..297283d0ea 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -167,6 +167,7 @@ ${DORIS_TEST_BINARY_DIR}/util/frame_of_reference_coding_test ${DORIS_TEST_BINARY_DIR}/util/zip_util_test ${DORIS_TEST_BINARY_DIR}/util/utf8_check_test ${DORIS_TEST_BINARY_DIR}/util/cgroup_util_test +${DORIS_TEST_BINARY_DIR}/util/path_util_test # Running common Unittest ${DORIS_TEST_BINARY_DIR}/common/resource_tls_test