[feature](fs) add fs benchmark tool framework (#20770)

Add an optional executable binary fs_benchmark_tool, for test the performance of file system such as hdfs, s3.
Usage:

./fs_benchmark_tool --conf my.conf --fs_type=s3 --operation=read --iterations=5
in my.conf, you can add any config key value with following format:

key1=value1
key2=value2
By default, this binary will not be built. Only build it when setting BUILD_FS_BENCHMARK=ON.
The binary will be installed in output/be/lib.

For developer, you can add new subclass of BaseBenchmark to add your own benchmark.
See be/src/io/fs/benchmark/s3_benchmark.hpp for an example
This commit is contained in:
Mingyu Chen
2023-06-14 17:50:06 +08:00
committed by GitHub
parent a0d4f11667
commit 615778924e
7 changed files with 417 additions and 31 deletions

View File

@ -120,8 +120,12 @@ message(STATUS "THIRDPARTY_DIR is ${THIRDPARTY_DIR}")
option(MAKE_TEST "ON for make unit test or OFF for not" OFF)
message(STATUS "make test: ${MAKE_TEST}")
option(WITH_MYSQL "Support access MySQL" ON)
option(BUILD_FS_BENCHMARK "ON for building fs benchmark tool or OFF for not" OFF)
message(STATUS "build fs benchmark tool: ${BUILD_FS_BENCHMARK}")
set(CMAKE_SKIP_RPATH TRUE)
set(Boost_USE_STATIC_LIBS ON)
set(Boost_USE_STATIC_RUNTIME ON)

View File

@ -26,3 +26,29 @@ file(GLOB_RECURSE IO_FILES CONFIGURE_DEPENDS *.cpp)
add_library(IO STATIC ${IO_FILES})
pch_reuse(IO)
if (${BUILD_FS_BENCHMARK} STREQUAL "ON")
add_executable(fs_benchmark_tool
fs/benchmark/fs_benchmark_tool.cpp
)
pch_reuse(fs_benchmark_tool)
# This permits libraries loaded by dlopen to link to the symbols in the program.
set_target_properties(fs_benchmark_tool PROPERTIES ENABLE_EXPORTS 1)
target_link_libraries(fs_benchmark_tool
${DORIS_LINK_LIBS}
benchmark
)
install(DIRECTORY DESTINATION ${OUTPUT_DIR}/lib/)
install(TARGETS fs_benchmark_tool DESTINATION ${OUTPUT_DIR}/lib/)
add_custom_command(TARGET fs_benchmark_tool POST_BUILD
COMMAND ${CMAKE_OBJCOPY} --only-keep-debug $<TARGET_FILE:fs_benchmark_tool> $<TARGET_FILE:fs_benchmark_tool>.dbg
COMMAND ${CMAKE_STRIP} --strip-debug --strip-unneeded $<TARGET_FILE:fs_benchmark_tool>
COMMAND ${CMAKE_OBJCOPY} --add-gnu-debuglink=$<TARGET_FILE:fs_benchmark_tool>.dbg $<TARGET_FILE:fs_benchmark_tool>
)
endif()

View File

@ -0,0 +1,87 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <benchmark/benchmark.h>
#include <fmt/format.h>
#include <chrono>
#include <ctime>
#include <iostream>
#include <string>
#include <vector>
#include "common/status.h"
namespace doris::io {
template <typename... Args>
void bm_log(const std::string& fmt, Args&&... args) {
auto now = std::chrono::system_clock::now();
std::time_t now_time = std::chrono::system_clock::to_time_t(now);
std::tm* local_time = std::localtime(&now_time);
char time_str[20];
std::strftime(time_str, sizeof(time_str), "%Y-%m-%d %H:%M:%S", local_time);
std::cout << "[" << time_str << "] " << fmt::format(fmt, std::forward<Args>(args)...)
<< std::endl;
}
class BaseBenchmark {
public:
BaseBenchmark(const std::string& name, int iterations,
const std::map<std::string, std::string>& conf_map)
: _name(name), _iterations(iterations), _conf_map(conf_map) {}
virtual ~BaseBenchmark() = default;
virtual Status init() { return Status::OK(); }
virtual Status run() { return Status::OK(); }
void register_bm() {
auto bm = benchmark::RegisterBenchmark(_name.c_str(), [&](benchmark::State& state) {
// first turn will use more time
Status st;
st = this->init();
if (!st) {
std::cerr << "failed to init. bm: " << _name << ", err: " << st;
return;
}
st = this->run();
if (!st) {
std::cerr << "failed to run at first time. bm: " << _name << ", err: " << st;
return;
}
for (auto _ : state) {
state.PauseTiming();
this->init();
state.ResumeTiming();
this->run();
}
});
if (_iterations != 0) {
bm->Iterations(_iterations);
}
bm->Unit(benchmark::kMillisecond);
}
protected:
std::string _name;
int _iterations;
std::map<std::string, std::string> _conf_map;
};
} // namespace doris::io

View File

@ -0,0 +1,83 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <iostream>
#include <string>
#include <vector>
#include "io/fs/benchmark/s3_benchmark.hpp"
namespace doris::io {
class BenchmarkFactory {
public:
static Status getBm(const std::string fs_type, const std::string op_type, int64_t iterations,
const std::map<std::string, std::string>& conf_map, BaseBenchmark** bm);
};
Status BenchmarkFactory::getBm(const std::string fs_type, const std::string op_type,
int64_t iterations,
const std::map<std::string, std::string>& conf_map,
BaseBenchmark** bm) {
if (fs_type == "s3") {
if (op_type == "read") {
*bm = new S3ReadBenchmark(iterations, conf_map);
} else {
return Status::Error<ErrorCode::INVALID_ARGUMENT>(
"unknown params: fs_type: {}, op_type: {}, iterations: {}", fs_type, op_type,
iterations);
}
}
return Status::OK();
}
class MultiBenchmark {
public:
MultiBenchmark(const std::string& type, const std::string& operation, int64_t iterations,
const std::map<std::string, std::string>& conf_map)
: _type(type), _operation(operation), _iterations(iterations), _conf_map(conf_map) {}
~MultiBenchmark() {
for (auto bm : benchmarks) {
delete bm;
}
}
Status init_env() { return Status::OK(); }
Status init_bms() {
BaseBenchmark* bm;
Status st = BenchmarkFactory::getBm(_type, _operation, _iterations, _conf_map, &bm);
if (!st) {
return st;
}
bm->register_bm();
benchmarks.emplace_back(bm);
return Status::OK();
}
private:
std::vector<BaseBenchmark*> benchmarks;
std::string _type;
std::string _operation;
int64_t _iterations;
std::map<std::string, std::string> _conf_map;
};
} // namespace doris::io

View File

@ -0,0 +1,121 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gflags/gflags.h>
#include <fstream>
#include "io/fs/benchmark/benchmark_factory.hpp"
DEFINE_string(fs_type, "hdfs", "Supported File System: s3, hdfs, local");
DEFINE_string(operation, "read", "Supported Operations: read, write, open, size, list, connect");
DEFINE_string(iterations, "10", "Number of runs");
DEFINE_string(conf, "", "config file");
std::string get_usage(const std::string& progname) {
std::stringstream ss;
ss << progname << " is the Doris BE benchmark tool for testing file system.\n";
ss << "Usage:\n";
ss << progname << " --fs_type=[fs_type] --operation=[op_type] --iterations=10\n";
ss << "\nfs_type:\n";
ss << " hdfs\n";
ss << " s3\n";
ss << "\nop_type:\n";
ss << " read\n";
ss << " write\n";
ss << "\niterations:\n";
ss << " num of run\n";
ss << "\nExample:\n";
ss << progname << " --conf my.conf --fs_type=s3 --operation=read --iterations=100\n";
return ss.str();
}
int read_conf(const std::string& conf, std::map<std::string, std::string>* conf_map) {
bool ok = true;
std::ifstream fin(conf);
if (fin.is_open()) {
std::string line;
while (getline(fin, line)) {
if (line.empty() || line.rfind("#", 0) == 0) {
// skip empty line and line starts with #
continue;
}
size_t pos = line.find('=');
if (pos != std::string::npos) {
std::string key = line.substr(0, pos);
std::string val = line.substr(pos + 1);
(*conf_map)[key] = val;
} else {
std::cout << "invalid config item: " << line << std::endl;
ok = false;
break;
}
}
fin.close();
std::cout << "read config from file \"" << conf << "\":\n";
for (auto it = conf_map->begin(); it != conf_map->end(); it++) {
std::cout << it->first << " = " << it->second << std::endl;
}
} else {
std::cout << "failed to open conf file: " << conf << std::endl;
return 1;
}
return ok ? 0 : 1;
}
int main(int argc, char** argv) {
std::string usage = get_usage(argv[0]);
gflags::SetUsageMessage(usage);
google::ParseCommandLineFlags(&argc, &argv, true);
std::string conf_file = FLAGS_conf;
std::map<std::string, std::string> conf_map;
int res = read_conf(conf_file, &conf_map);
if (res != 0) {
std::cout << "failed to read conf from file \"conf_file\"" << std::endl;
return 1;
}
try {
doris::io::MultiBenchmark multi_bm(FLAGS_fs_type, FLAGS_operation,
std::stoi(FLAGS_iterations), conf_map);
doris::Status st = multi_bm.init_env();
if (!st) {
std::cout << "init env failed: " << st << std::endl;
return 1;
}
st = multi_bm.init_bms();
if (!st) {
std::cout << "init bms failed: " << st << std::endl;
return 1;
}
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
benchmark::Shutdown();
} catch (std::invalid_argument const& ex) {
std::cout << "std::invalid_argument::what(): " << ex.what() << std::endl;
return 1;
} catch (std::out_of_range const& ex) {
std::cout << "std::out_of_range::what(): " << ex.what() << std::endl;
return 1;
}
return 0;
}

View File

@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "io/file_factory.h"
#include "io/fs/benchmark/base_benchmark.h"
#include "io/fs/s3_file_reader.h"
#include "io/fs/s3_file_system.h"
#include "util/slice.h"
namespace doris::io {
class S3ReadBenchmark : public BaseBenchmark {
public:
S3ReadBenchmark(int iterations, const std::map<std::string, std::string>& conf_map)
: BaseBenchmark("S3ReadBenchmark", iterations, conf_map), _result(buffer, 128) {}
virtual ~S3ReadBenchmark() = default;
Status init() override {
bm_log("begin to init {}", _name);
std::string file_path = _conf_map["file"];
io::FileReaderOptions reader_opts = FileFactory::get_reader_options(nullptr);
RETURN_IF_ERROR(
FileFactory::create_s3_reader(_conf_map, file_path, &_fs, &_reader, reader_opts));
bm_log("finish to init {}", _name);
return Status::OK();
}
Status run() override { return _reader->read_at(0, _result, &_bytes_read); }
private:
doris::S3Conf _s3_conf;
std::shared_ptr<io::FileSystem> _fs;
io::FileReaderSPtr _reader;
char buffer[128];
doris::Slice _result;
size_t _bytes_read = 0;
};
} // namespace doris::io

View File

@ -343,8 +343,12 @@ if [[ -z "${OUTPUT_BE_BINARY}" ]]; then
OUTPUT_BE_BINARY=${BUILD_BE}
fi
if [[ -z "${BUILD_BE_JAVA_EXTENSIONS}" ]]; then
BUILD_BE_JAVA_EXTENSIONS='OFF'
if [[ -n "${DISABLE_BE_JAVA_EXTENSIONS}" ]]; then
if [[ "${DISABLE_BE_JAVA_EXTENSIONS}" == "ON" ]]; then
BUILD_BE_JAVA_EXTENSIONS=0
else
BUILD_BE_JAVA_EXTENSIONS=1
fi
fi
if [[ -z "${DISABLE_JAVA_CHECK_STYLE}" ]]; then
@ -368,41 +372,37 @@ if [[ "${BUILD_BE_JAVA_EXTENSIONS}" -eq 1 && "$(uname -s)" == 'Darwin' ]]; then
fi
if [[ -n "${CAUSE}" ]]; then
echo -e "\033[33;1mWARNNING: \033[37;1mSkip building with Java UDF due to ${CAUSE}.\033[0m"
echo -e "\033[33;1mWARNNING: \033[37;1mSkip building with BE Java extensions due to ${CAUSE}.\033[0m"
BUILD_BE_JAVA_EXTENSIONS=0
BUILD_BE_JAVA_EXTENSIONS_IN_CONF=1
fi
fi
if [[ "${BUILD_BE_JAVA_EXTENSIONS}" == "ON" ]]; then
BUILD_BE_JAVA_EXTENSIONS=0
fi
echo "Get params:
BUILD_FE -- ${BUILD_FE}
BUILD_BE -- ${BUILD_BE}
BUILD_BROKER -- ${BUILD_BROKER}
BUILD_AUDIT -- ${BUILD_AUDIT}
BUILD_META_TOOL -- ${BUILD_META_TOOL}
BUILD_SPARK_DPP -- ${BUILD_SPARK_DPP}
BUILD_BE_JAVA_EXTENSIONS -- ${BUILD_BE_JAVA_EXTENSIONS}
BUILD_HIVE_UDF -- ${BUILD_HIVE_UDF}
PARALLEL -- ${PARALLEL}
CLEAN -- ${CLEAN}
WITH_MYSQL -- ${WITH_MYSQL}
WITH_LZO -- ${WITH_LZO}
GLIBC_COMPATIBILITY -- ${GLIBC_COMPATIBILITY}
USE_AVX2 -- ${USE_AVX2}
USE_LIBCPP -- ${USE_LIBCPP}
USE_DWARF -- ${USE_DWARF}
STRIP_DEBUG_INFO -- ${STRIP_DEBUG_INFO}
USE_MEM_TRACKER -- ${USE_MEM_TRACKER}
USE_JEMALLOC -- ${USE_JEMALLOC}
USE_BTHREAD_SCANNER -- ${USE_BTHREAD_SCANNER}
ENABLE_STACKTRACE -- ${ENABLE_STACKTRACE}
DENABLE_CLANG_COVERAGE -- ${DENABLE_CLANG_COVERAGE}
DISPLAY_BUILD_TIME -- ${DISPLAY_BUILD_TIME}
ENABLE_PCH -- ${ENABLE_PCH}
BUILD_FE -- ${BUILD_FE}
BUILD_BE -- ${BUILD_BE}
BUILD_BROKER -- ${BUILD_BROKER}
BUILD_AUDIT -- ${BUILD_AUDIT}
BUILD_META_TOOL -- ${BUILD_META_TOOL}
BUILD_SPARK_DPP -- ${BUILD_SPARK_DPP}
BUILD_BE_JAVA_EXTENSIONS -- ${BUILD_BE_JAVA_EXTENSIONS}
BUILD_HIVE_UDF -- ${BUILD_HIVE_UDF}
PARALLEL -- ${PARALLEL}
CLEAN -- ${CLEAN}
WITH_MYSQL -- ${WITH_MYSQL}
WITH_LZO -- ${WITH_LZO}
GLIBC_COMPATIBILITY -- ${GLIBC_COMPATIBILITY}
USE_AVX2 -- ${USE_AVX2}
USE_LIBCPP -- ${USE_LIBCPP}
USE_DWARF -- ${USE_DWARF}
STRIP_DEBUG_INFO -- ${STRIP_DEBUG_INFO}
USE_MEM_TRACKER -- ${USE_MEM_TRACKER}
USE_JEMALLOC -- ${USE_JEMALLOC}
USE_BTHREAD_SCANNER -- ${USE_BTHREAD_SCANNER}
ENABLE_STACKTRACE -- ${ENABLE_STACKTRACE}
DENABLE_CLANG_COVERAGE -- ${DENABLE_CLANG_COVERAGE}
DISPLAY_BUILD_TIME -- ${DISPLAY_BUILD_TIME}
ENABLE_PCH -- ${ENABLE_PCH}
"
# Clean and build generated code
@ -454,9 +454,15 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
clean_be
fi
MAKE_PROGRAM="$(command -v "${BUILD_SYSTEM}")"
if [[ -z "${BUILD_FS_BENCHMARK}" ]]; then
BUILD_FS_BENCHMARK=OFF
fi
echo "-- Make program: ${MAKE_PROGRAM}"
echo "-- Use ccache: ${CMAKE_USE_CCACHE}"
echo "-- Extra cxx flags: ${EXTRA_CXX_FLAGS:-}"
echo "-- Build fs benchmark tool: ${BUILD_FS_BENCHMARK}"
mkdir -p "${CMAKE_BUILD_DIR}"
cd "${CMAKE_BUILD_DIR}"
@ -465,6 +471,7 @@ if [[ "${BUILD_BE}" -eq 1 ]]; then
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" \
-DMAKE_TEST=OFF \
-DBUILD_FS_BENCHMARK="${BUILD_FS_BENCHMARK}" \
${CMAKE_USE_CCACHE:+${CMAKE_USE_CCACHE}} \
-DWITH_MYSQL="${WITH_MYSQL}" \
-DWITH_LZO="${WITH_LZO}" \
@ -611,6 +618,9 @@ EOF
# See: https://stackoverflow.com/questions/67378106/mac-m1-cping-binary-over-another-results-in-crash
rm -f "${DORIS_OUTPUT}/be/lib/doris_be"
cp -r -p "${DORIS_HOME}/be/output/lib/doris_be" "${DORIS_OUTPUT}/be/lib"/
if [[ -f "${DORIS_HOME}/be/output/lib/fs_benchmark_tool" ]]; then
cp -r -p "${DORIS_HOME}/be/output/lib/fs_benchmark_tool" "${DORIS_OUTPUT}/be/lib"/
fi
# make a soft link palo_be point to doris_be, for forward compatibility
cd "${DORIS_OUTPUT}/be/lib"