diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 4da8a25b08..5804067da9 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -120,8 +120,12 @@ message(STATUS "THIRDPARTY_DIR is ${THIRDPARTY_DIR}") option(MAKE_TEST "ON for make unit test or OFF for not" OFF) message(STATUS "make test: ${MAKE_TEST}") + option(WITH_MYSQL "Support access MySQL" ON) +option(BUILD_FS_BENCHMARK "ON for building fs benchmark tool or OFF for not" OFF) +message(STATUS "build fs benchmark tool: ${BUILD_FS_BENCHMARK}") + set(CMAKE_SKIP_RPATH TRUE) set(Boost_USE_STATIC_LIBS ON) set(Boost_USE_STATIC_RUNTIME ON) diff --git a/be/src/io/CMakeLists.txt b/be/src/io/CMakeLists.txt index cb964e17dc..30aef9c44a 100644 --- a/be/src/io/CMakeLists.txt +++ b/be/src/io/CMakeLists.txt @@ -26,3 +26,29 @@ file(GLOB_RECURSE IO_FILES CONFIGURE_DEPENDS *.cpp) add_library(IO STATIC ${IO_FILES}) pch_reuse(IO) + +if (${BUILD_FS_BENCHMARK} STREQUAL "ON") + add_executable(fs_benchmark_tool + fs/benchmark/fs_benchmark_tool.cpp + ) + + pch_reuse(fs_benchmark_tool) + + # This permits libraries loaded by dlopen to link to the symbols in the program. + set_target_properties(fs_benchmark_tool PROPERTIES ENABLE_EXPORTS 1) + + target_link_libraries(fs_benchmark_tool + ${DORIS_LINK_LIBS} + benchmark + ) + + install(DIRECTORY DESTINATION ${OUTPUT_DIR}/lib/) + install(TARGETS fs_benchmark_tool DESTINATION ${OUTPUT_DIR}/lib/) + + add_custom_command(TARGET fs_benchmark_tool POST_BUILD + COMMAND ${CMAKE_OBJCOPY} --only-keep-debug $ $.dbg + COMMAND ${CMAKE_STRIP} --strip-debug --strip-unneeded $ + COMMAND ${CMAKE_OBJCOPY} --add-gnu-debuglink=$.dbg $ + ) + +endif() diff --git a/be/src/io/fs/benchmark/base_benchmark.h b/be/src/io/fs/benchmark/base_benchmark.h new file mode 100644 index 0000000000..bb1c00233f --- /dev/null +++ b/be/src/io/fs/benchmark/base_benchmark.h @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +#include "common/status.h" + +namespace doris::io { + +template +void bm_log(const std::string& fmt, Args&&... args) { + auto now = std::chrono::system_clock::now(); + std::time_t now_time = std::chrono::system_clock::to_time_t(now); + std::tm* local_time = std::localtime(&now_time); + char time_str[20]; + std::strftime(time_str, sizeof(time_str), "%Y-%m-%d %H:%M:%S", local_time); + std::cout << "[" << time_str << "] " << fmt::format(fmt, std::forward(args)...) + << std::endl; +} + +class BaseBenchmark { +public: + BaseBenchmark(const std::string& name, int iterations, + const std::map& conf_map) + : _name(name), _iterations(iterations), _conf_map(conf_map) {} + virtual ~BaseBenchmark() = default; + + virtual Status init() { return Status::OK(); } + virtual Status run() { return Status::OK(); } + + void register_bm() { + auto bm = benchmark::RegisterBenchmark(_name.c_str(), [&](benchmark::State& state) { + // first turn will use more time + Status st; + st = this->init(); + if (!st) { + std::cerr << "failed to init. bm: " << _name << ", err: " << st; + return; + } + st = this->run(); + if (!st) { + std::cerr << "failed to run at first time. bm: " << _name << ", err: " << st; + return; + } + for (auto _ : state) { + state.PauseTiming(); + this->init(); + state.ResumeTiming(); + this->run(); + } + }); + if (_iterations != 0) { + bm->Iterations(_iterations); + } + bm->Unit(benchmark::kMillisecond); + } + +protected: + std::string _name; + int _iterations; + std::map _conf_map; +}; + +} // namespace doris::io diff --git a/be/src/io/fs/benchmark/benchmark_factory.hpp b/be/src/io/fs/benchmark/benchmark_factory.hpp new file mode 100644 index 0000000000..3f48bd16ce --- /dev/null +++ b/be/src/io/fs/benchmark/benchmark_factory.hpp @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "io/fs/benchmark/s3_benchmark.hpp" + +namespace doris::io { + +class BenchmarkFactory { +public: + static Status getBm(const std::string fs_type, const std::string op_type, int64_t iterations, + const std::map& conf_map, BaseBenchmark** bm); +}; + +Status BenchmarkFactory::getBm(const std::string fs_type, const std::string op_type, + int64_t iterations, + const std::map& conf_map, + BaseBenchmark** bm) { + if (fs_type == "s3") { + if (op_type == "read") { + *bm = new S3ReadBenchmark(iterations, conf_map); + } else { + return Status::Error( + "unknown params: fs_type: {}, op_type: {}, iterations: {}", fs_type, op_type, + iterations); + } + } + return Status::OK(); +} + +class MultiBenchmark { +public: + MultiBenchmark(const std::string& type, const std::string& operation, int64_t iterations, + const std::map& conf_map) + : _type(type), _operation(operation), _iterations(iterations), _conf_map(conf_map) {} + + ~MultiBenchmark() { + for (auto bm : benchmarks) { + delete bm; + } + } + + Status init_env() { return Status::OK(); } + + Status init_bms() { + BaseBenchmark* bm; + Status st = BenchmarkFactory::getBm(_type, _operation, _iterations, _conf_map, &bm); + if (!st) { + return st; + } + bm->register_bm(); + benchmarks.emplace_back(bm); + return Status::OK(); + } + +private: + std::vector benchmarks; + std::string _type; + std::string _operation; + int64_t _iterations; + std::map _conf_map; +}; + +} // namespace doris::io diff --git a/be/src/io/fs/benchmark/fs_benchmark_tool.cpp b/be/src/io/fs/benchmark/fs_benchmark_tool.cpp new file mode 100644 index 0000000000..ad8772bb0f --- /dev/null +++ b/be/src/io/fs/benchmark/fs_benchmark_tool.cpp @@ -0,0 +1,121 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "io/fs/benchmark/benchmark_factory.hpp" + +DEFINE_string(fs_type, "hdfs", "Supported File System: s3, hdfs, local"); +DEFINE_string(operation, "read", "Supported Operations: read, write, open, size, list, connect"); +DEFINE_string(iterations, "10", "Number of runs"); +DEFINE_string(conf, "", "config file"); + +std::string get_usage(const std::string& progname) { + std::stringstream ss; + ss << progname << " is the Doris BE benchmark tool for testing file system.\n"; + + ss << "Usage:\n"; + ss << progname << " --fs_type=[fs_type] --operation=[op_type] --iterations=10\n"; + ss << "\nfs_type:\n"; + ss << " hdfs\n"; + ss << " s3\n"; + ss << "\nop_type:\n"; + ss << " read\n"; + ss << " write\n"; + ss << "\niterations:\n"; + ss << " num of run\n"; + ss << "\nExample:\n"; + ss << progname << " --conf my.conf --fs_type=s3 --operation=read --iterations=100\n"; + return ss.str(); +} + +int read_conf(const std::string& conf, std::map* conf_map) { + bool ok = true; + std::ifstream fin(conf); + if (fin.is_open()) { + std::string line; + while (getline(fin, line)) { + if (line.empty() || line.rfind("#", 0) == 0) { + // skip empty line and line starts with # + continue; + } + size_t pos = line.find('='); + if (pos != std::string::npos) { + std::string key = line.substr(0, pos); + std::string val = line.substr(pos + 1); + (*conf_map)[key] = val; + } else { + std::cout << "invalid config item: " << line << std::endl; + ok = false; + break; + } + } + fin.close(); + + std::cout << "read config from file \"" << conf << "\":\n"; + for (auto it = conf_map->begin(); it != conf_map->end(); it++) { + std::cout << it->first << " = " << it->second << std::endl; + } + } else { + std::cout << "failed to open conf file: " << conf << std::endl; + return 1; + } + return ok ? 0 : 1; +} + +int main(int argc, char** argv) { + std::string usage = get_usage(argv[0]); + gflags::SetUsageMessage(usage); + google::ParseCommandLineFlags(&argc, &argv, true); + + std::string conf_file = FLAGS_conf; + std::map conf_map; + int res = read_conf(conf_file, &conf_map); + if (res != 0) { + std::cout << "failed to read conf from file \"conf_file\"" << std::endl; + return 1; + } + + try { + doris::io::MultiBenchmark multi_bm(FLAGS_fs_type, FLAGS_operation, + std::stoi(FLAGS_iterations), conf_map); + doris::Status st = multi_bm.init_env(); + if (!st) { + std::cout << "init env failed: " << st << std::endl; + return 1; + } + st = multi_bm.init_bms(); + if (!st) { + std::cout << "init bms failed: " << st << std::endl; + return 1; + } + + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); + + } catch (std::invalid_argument const& ex) { + std::cout << "std::invalid_argument::what(): " << ex.what() << std::endl; + return 1; + } catch (std::out_of_range const& ex) { + std::cout << "std::out_of_range::what(): " << ex.what() << std::endl; + return 1; + } + return 0; +} diff --git a/be/src/io/fs/benchmark/s3_benchmark.hpp b/be/src/io/fs/benchmark/s3_benchmark.hpp new file mode 100644 index 0000000000..5b9a81aaec --- /dev/null +++ b/be/src/io/fs/benchmark/s3_benchmark.hpp @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "io/file_factory.h" +#include "io/fs/benchmark/base_benchmark.h" +#include "io/fs/s3_file_reader.h" +#include "io/fs/s3_file_system.h" +#include "util/slice.h" + +namespace doris::io { + +class S3ReadBenchmark : public BaseBenchmark { +public: + S3ReadBenchmark(int iterations, const std::map& conf_map) + : BaseBenchmark("S3ReadBenchmark", iterations, conf_map), _result(buffer, 128) {} + virtual ~S3ReadBenchmark() = default; + + Status init() override { + bm_log("begin to init {}", _name); + std::string file_path = _conf_map["file"]; + io::FileReaderOptions reader_opts = FileFactory::get_reader_options(nullptr); + RETURN_IF_ERROR( + FileFactory::create_s3_reader(_conf_map, file_path, &_fs, &_reader, reader_opts)); + bm_log("finish to init {}", _name); + return Status::OK(); + } + + Status run() override { return _reader->read_at(0, _result, &_bytes_read); } + +private: + doris::S3Conf _s3_conf; + std::shared_ptr _fs; + io::FileReaderSPtr _reader; + char buffer[128]; + doris::Slice _result; + size_t _bytes_read = 0; +}; + +} // namespace doris::io diff --git a/build.sh b/build.sh index 929343e3a4..44f69fc88f 100755 --- a/build.sh +++ b/build.sh @@ -343,8 +343,12 @@ if [[ -z "${OUTPUT_BE_BINARY}" ]]; then OUTPUT_BE_BINARY=${BUILD_BE} fi -if [[ -z "${BUILD_BE_JAVA_EXTENSIONS}" ]]; then - BUILD_BE_JAVA_EXTENSIONS='OFF' +if [[ -n "${DISABLE_BE_JAVA_EXTENSIONS}" ]]; then + if [[ "${DISABLE_BE_JAVA_EXTENSIONS}" == "ON" ]]; then + BUILD_BE_JAVA_EXTENSIONS=0 + else + BUILD_BE_JAVA_EXTENSIONS=1 + fi fi if [[ -z "${DISABLE_JAVA_CHECK_STYLE}" ]]; then @@ -368,41 +372,37 @@ if [[ "${BUILD_BE_JAVA_EXTENSIONS}" -eq 1 && "$(uname -s)" == 'Darwin' ]]; then fi if [[ -n "${CAUSE}" ]]; then - echo -e "\033[33;1mWARNNING: \033[37;1mSkip building with Java UDF due to ${CAUSE}.\033[0m" + echo -e "\033[33;1mWARNNING: \033[37;1mSkip building with BE Java extensions due to ${CAUSE}.\033[0m" BUILD_BE_JAVA_EXTENSIONS=0 BUILD_BE_JAVA_EXTENSIONS_IN_CONF=1 fi fi -if [[ "${BUILD_BE_JAVA_EXTENSIONS}" == "ON" ]]; then - BUILD_BE_JAVA_EXTENSIONS=0 -fi - echo "Get params: - BUILD_FE -- ${BUILD_FE} - BUILD_BE -- ${BUILD_BE} - BUILD_BROKER -- ${BUILD_BROKER} - BUILD_AUDIT -- ${BUILD_AUDIT} - BUILD_META_TOOL -- ${BUILD_META_TOOL} - BUILD_SPARK_DPP -- ${BUILD_SPARK_DPP} - BUILD_BE_JAVA_EXTENSIONS -- ${BUILD_BE_JAVA_EXTENSIONS} - BUILD_HIVE_UDF -- ${BUILD_HIVE_UDF} - PARALLEL -- ${PARALLEL} - CLEAN -- ${CLEAN} - WITH_MYSQL -- ${WITH_MYSQL} - WITH_LZO -- ${WITH_LZO} - GLIBC_COMPATIBILITY -- ${GLIBC_COMPATIBILITY} - USE_AVX2 -- ${USE_AVX2} - USE_LIBCPP -- ${USE_LIBCPP} - USE_DWARF -- ${USE_DWARF} - STRIP_DEBUG_INFO -- ${STRIP_DEBUG_INFO} - USE_MEM_TRACKER -- ${USE_MEM_TRACKER} - USE_JEMALLOC -- ${USE_JEMALLOC} - USE_BTHREAD_SCANNER -- ${USE_BTHREAD_SCANNER} - ENABLE_STACKTRACE -- ${ENABLE_STACKTRACE} - DENABLE_CLANG_COVERAGE -- ${DENABLE_CLANG_COVERAGE} - DISPLAY_BUILD_TIME -- ${DISPLAY_BUILD_TIME} - ENABLE_PCH -- ${ENABLE_PCH} + BUILD_FE -- ${BUILD_FE} + BUILD_BE -- ${BUILD_BE} + BUILD_BROKER -- ${BUILD_BROKER} + BUILD_AUDIT -- ${BUILD_AUDIT} + BUILD_META_TOOL -- ${BUILD_META_TOOL} + BUILD_SPARK_DPP -- ${BUILD_SPARK_DPP} + BUILD_BE_JAVA_EXTENSIONS -- ${BUILD_BE_JAVA_EXTENSIONS} + BUILD_HIVE_UDF -- ${BUILD_HIVE_UDF} + PARALLEL -- ${PARALLEL} + CLEAN -- ${CLEAN} + WITH_MYSQL -- ${WITH_MYSQL} + WITH_LZO -- ${WITH_LZO} + GLIBC_COMPATIBILITY -- ${GLIBC_COMPATIBILITY} + USE_AVX2 -- ${USE_AVX2} + USE_LIBCPP -- ${USE_LIBCPP} + USE_DWARF -- ${USE_DWARF} + STRIP_DEBUG_INFO -- ${STRIP_DEBUG_INFO} + USE_MEM_TRACKER -- ${USE_MEM_TRACKER} + USE_JEMALLOC -- ${USE_JEMALLOC} + USE_BTHREAD_SCANNER -- ${USE_BTHREAD_SCANNER} + ENABLE_STACKTRACE -- ${ENABLE_STACKTRACE} + DENABLE_CLANG_COVERAGE -- ${DENABLE_CLANG_COVERAGE} + DISPLAY_BUILD_TIME -- ${DISPLAY_BUILD_TIME} + ENABLE_PCH -- ${ENABLE_PCH} " # Clean and build generated code @@ -454,9 +454,15 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then clean_be fi MAKE_PROGRAM="$(command -v "${BUILD_SYSTEM}")" + + if [[ -z "${BUILD_FS_BENCHMARK}" ]]; then + BUILD_FS_BENCHMARK=OFF + fi + echo "-- Make program: ${MAKE_PROGRAM}" echo "-- Use ccache: ${CMAKE_USE_CCACHE}" echo "-- Extra cxx flags: ${EXTRA_CXX_FLAGS:-}" + echo "-- Build fs benchmark tool: ${BUILD_FS_BENCHMARK}" mkdir -p "${CMAKE_BUILD_DIR}" cd "${CMAKE_BUILD_DIR}" @@ -465,6 +471,7 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \ -DMAKE_TEST=OFF \ + -DBUILD_FS_BENCHMARK="${BUILD_FS_BENCHMARK}" \ ${CMAKE_USE_CCACHE:+${CMAKE_USE_CCACHE}} \ -DWITH_MYSQL="${WITH_MYSQL}" \ -DWITH_LZO="${WITH_LZO}" \ @@ -611,6 +618,9 @@ EOF # See: https://stackoverflow.com/questions/67378106/mac-m1-cping-binary-over-another-results-in-crash rm -f "${DORIS_OUTPUT}/be/lib/doris_be" cp -r -p "${DORIS_HOME}/be/output/lib/doris_be" "${DORIS_OUTPUT}/be/lib"/ + if [[ -f "${DORIS_HOME}/be/output/lib/fs_benchmark_tool" ]]; then + cp -r -p "${DORIS_HOME}/be/output/lib/fs_benchmark_tool" "${DORIS_OUTPUT}/be/lib"/ + fi # make a soft link palo_be point to doris_be, for forward compatibility cd "${DORIS_OUTPUT}/be/lib"