From f32deb18e9832ccbd8193fc6822c2616faa2d58e Mon Sep 17 00:00:00 2001 From: airborne12 Date: Fri, 19 May 2023 08:25:51 +0800 Subject: [PATCH] [Update](build) change clucene from thirdparty to git module (#19352) --- .gitmodules | 4 ++ be/CMakeLists.txt | 60 ++++++++++++++----- be/src/clucene | 1 + .../segment_v2/inverted_index_reader.cpp | 4 +- build.sh | 35 ++++++----- thirdparty/build-thirdparty.sh | 46 -------------- thirdparty/vars.sh | 7 --- 7 files changed, 72 insertions(+), 85 deletions(-) create mode 160000 be/src/clucene diff --git a/.gitmodules b/.gitmodules index 06213cbb75..9fe51bfd1d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -29,3 +29,7 @@ path = be/src/apache-orc url = https://github.com/apache/doris-thirdparty.git branch = orc +[submodule "be/src/clucene"] + path = be/src/clucene + url = https://github.com/apache/doris-thirdparty.git + branch = clucene diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 182837f1a0..67ad424da5 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -148,18 +148,6 @@ endif() set(GPERFTOOLS_HOME "${THIRDPARTY_DIR}/gperftools") # Set all libraries -add_library(ic STATIC IMPORTED) -set_target_properties(ic PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libic.a) - -add_library(clucene-core STATIC IMPORTED) -set_target_properties(clucene-core PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libclucene-core-static.a) - -add_library(clucene-shared STATIC IMPORTED) -set_target_properties(clucene-shared PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libclucene-shared-static.a) - -add_library(clucene-contribs-lib STATIC IMPORTED) -set_target_properties(clucene-contribs-lib PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libclucene-contribs-lib.a) - add_library(gflags STATIC IMPORTED) set_target_properties(gflags PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libgflags.a) @@ -449,6 +437,39 @@ SET(ZSTD_INCLUDE_DIR "$ENV{DORIS_THIRDPARTY}/installed/include/zstd") add_subdirectory(${SRC_DIR}/apache-orc EXCLUDE_FROM_ALL) target_compile_options(orc PRIVATE -Wno-implicit-fallthrough -w) +set(BUILD_STATIC_LIBRARIES ON) +set(BUILD_SHARED_LIBRARIES OFF) +set(BUILD_CONTRIBS_LIB ON) +set(BOOST_ROOT "$ENV{DORIS_THIRDPARTY}/installed") +set(ZLIB_ROOT "$ENV{DORIS_THIRDPARTY}/installed") +set(Roaring_ROOT "$ENV{DORIS_THIRDPARTY}/installed") +set(USE_STAT64 0) + +if (USE_BTHREAD_SCANNER) + set(USE_BTHREAD ON) +else() + set(USE_BTHREAD OFF) +endif() + + +add_subdirectory(${SRC_DIR}/clucene EXCLUDE_FROM_ALL) + +if (COMPILER_CLANG) + target_compile_options(clucene-core-static PRIVATE -fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall ) + target_compile_options(clucene-shared-static PRIVATE -fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall ) + target_compile_options(clucene-contribs-lib PRIVATE -fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall ) + target_compile_options(ic PRIVATE -fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall ) +else () + target_compile_options(clucene-core-static PRIVATE -fno-omit-frame-pointer -Wno-narrowing -w -Wall ) + target_compile_options(clucene-shared-static PRIVATE -fno-omit-frame-pointer -Wno-narrowing -w -Wall ) + target_compile_options(clucene-contribs-lib PRIVATE -fno-omit-frame-pointer -Wno-narrowing -w -Wall ) + target_compile_options(ic PRIVATE -fno-omit-frame-pointer -Wno-narrowing -w -Wall ) +endif() + +install(DIRECTORY + ${SRC_DIR}/clucene/src/contribs-lib/CLucene/analysis/jieba/dict + DESTINATION ${OUTPUT_DIR}) + # Check if functions are supported in this platform. All flags will generated # in gensrc/build/common/env_config.h. # You can check funcion here which depends on platform. Don't forget add this @@ -645,6 +666,13 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/src/apache-orc/c++/include ) +include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/src/clucene/src/shared + ${SRC_DIR}/clucene/src/core + ${SRC_DIR}/clucene/src/shared + ${SRC_DIR}/clucene/src/contribs-lib +) + include_directories( ${SRC_DIR}/ ${TEST_DIR}/ @@ -732,10 +760,6 @@ find_package(absl) # When adding new dependencies, If you don’t know if it can run on all platforms, # add it here first. set(COMMON_THIRDPARTY - ic - clucene-core - clucene-shared - clucene-contribs-lib backtrace rocksdb cyrus-sasl @@ -858,6 +882,10 @@ if (WITH_MYSQL) endif() set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} orc) +set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ic) +set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-core-static) +set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-shared-static) +set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-contribs-lib) set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${WL_END_GROUP}) diff --git a/be/src/clucene b/be/src/clucene new file mode 160000 index 0000000000..76cd035119 --- /dev/null +++ b/be/src/clucene @@ -0,0 +1 @@ +Subproject commit 76cd03511903916ab076ab0ef6f3779ab4e7476e diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index ae7b34bad6..48b399e8eb 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -38,10 +38,10 @@ #include #include -#include #include #include #include +#include #include #include "common/config.h" @@ -687,7 +687,7 @@ void InvertedIndexVisitor::visit(std::vector& doc_id, std::vector visit(roaring::Roaring::read(doc_id.data(), false)); } -void InvertedIndexVisitor::visit(Roaring* doc_id, std::vector& packed_value) { +void InvertedIndexVisitor::visit(roaring::Roaring* doc_id, std::vector& packed_value) { if (!matches(packed_value.data())) { return; } diff --git a/build.sh b/build.sh index 8b5373aab0..aa2f8fa4d8 100755 --- a/build.sh +++ b/build.sh @@ -252,19 +252,26 @@ if [[ ! -f "${DORIS_THIRDPARTY}/installed/lib/libbacktrace.a" ]]; then fi fi -echo "Update apache-orc ..." -set +e -cd "${DORIS_HOME}" -echo "Update apache-orc submodule ..." -git submodule update --init --recursive be/src/apache-orc -exit_code=$? -set -e -if [[ "${exit_code}" -ne 0 ]]; then - echo "Update apache-orc submodule failed, start to download and extract apache-orc package ..." - rm -rf "${DORIS_HOME}/be/src/apache-orc" - mkdir -p "${DORIS_HOME}/be/src/apache-orc" - curl -L https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz | tar -xz -C "${DORIS_HOME}/be/src/apache-orc" --strip-components=1 -fi +update_submodule() { + local submodule_path=$1 + local submodule_name=$2 + local archive_url=$3 + + set +e + cd "${DORIS_HOME}" + echo "Update ${submodule_name} submodule ..." + git submodule update --init --recursive "${submodule_path}" + exit_code=$? + set -e + if [[ "${exit_code}" -ne 0 ]]; then + echo "Update ${submodule_name} submodule failed, start to download and extract apache-orc package ..." + mkdir -p "${DORIS_HOME}/${submodule_path}" + curl -L "${archive_url}" | tar -xz -C "${DORIS_HOME}/${submodule_path}" --strip-components=1 + fi +} + +update_submodule "be/src/apache-orc" "apache-orc" "https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz" +update_submodule "be/src/clucene" "clucene" "https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz" if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 && "${BUILD_SPARK_DPP}" -eq 0 ]]; then clean_gensrc @@ -573,6 +580,7 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/ cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/ + cp -r -p "${DORIS_HOME}/be/output/dict" "${DORIS_OUTPUT}/be/" if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" "${DORIS_OUTPUT}/be/lib/" @@ -618,7 +626,6 @@ EOF copy_common_files "${DORIS_OUTPUT}/be/" mkdir -p "${DORIS_OUTPUT}/be/log" mkdir -p "${DORIS_OUTPUT}/be/storage" - cp -r -p "${DORIS_THIRDPARTY}/installed/share/dict" "${DORIS_OUTPUT}/be/" fi if [[ "${BUILD_BROKER}" -eq 1 ]]; then diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index c3ef10319e..0e07b3a5b7 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -1571,51 +1571,6 @@ build_fast_float() { cp -r ./include/fast_float "${TP_INSTALL_DIR}/include/" } -#clucene -build_clucene() { - if [[ "$(uname -m)" == 'x86_64' ]]; then - USE_AVX2="${USE_AVX2:-1}" - else - USE_AVX2="${USE_AVX2:-0}" - fi - if [[ -z "${USE_BTHREAD_SCANNER}" ]]; then - USE_BTHREAD_SCANNER='OFF' - fi - if [[ ${USE_BTHREAD_SCANNER} == "ON" ]]; then - USE_BTHREAD=1 - else - USE_BTHREAD=0 - fi - - check_if_source_exist "${CLUCENE_SOURCE}" - cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}" - - mkdir -p "${BUILD_DIR}" - cd "${BUILD_DIR}" - rm -rf CMakeCache.txt CMakeFiles/ - - ${CMAKE_CMD} -G "${GENERATOR}" \ - -DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}" \ - -DBUILD_STATIC_LIBRARIES=ON \ - -DBUILD_SHARED_LIBRARIES=OFF \ - -DBOOST_ROOT="${TP_INSTALL_DIR}" \ - -DZLIB_ROOT="${TP_INSTALL_DIR}" \ - -DCMAKE_CXX_FLAGS="-g -fno-omit-frame-pointer ${warning_narrowing}" \ - -DUSE_STAT64=0 \ - -DUSE_AVX2="${USE_AVX2}" \ - -DUSE_BTHREAD="${USE_BTHREAD}" \ - -DCMAKE_BUILD_TYPE=Release \ - -DBUILD_CONTRIBS_LIB=ON .. - ${BUILD_SYSTEM} -j "${PARALLEL}" - ${BUILD_SYSTEM} install - - cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}" - if [[ ! -d "${TP_INSTALL_DIR}"/share ]]; then - mkdir -p "${TP_INSTALL_DIR}"/share - fi - cp -rf src/contribs-lib/CLucene/analysis/jieba/dict "${TP_INSTALL_DIR}"/share/ -} - # hadoop_libs_x86 build_hadoop_libs_x86() { check_if_source_exist "${HADOOP_LIBS_X86_SOURCE}" @@ -1685,7 +1640,6 @@ if [[ "${#packages[@]}" -eq 0 ]]; then xxhash concurrentqueue fast_float - clucene ) if [[ "$(uname -s)" == 'Darwin' ]]; then read -r -a packages <<<"binutils gettext ${packages[*]}" diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 3ced85e07b..05846629c9 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -54,12 +54,6 @@ export TP_JAR_DIR="${TP_INSTALL_DIR}/lib/jar" # of all thirdparties ##################################################### -#clucene -CLUCENE_DOWNLOAD="https://github.com/apache/doris-thirdparty/archive/refs/tags/libclucene-v2.4.12.tar.gz" -CLUCENE_NAME="doris-thirdparty-libclucene-v2.4.12.tar.gz" -CLUCENE_SOURCE="doris-thirdparty-libclucene-v2.4.12" -CLUCENE_MD5SUM="171035c1d4c9fe3d7307f04dd76ab3e3" - # libevent LIBEVENT_DOWNLOAD="https://github.com/libevent/libevent/archive/release-2.1.12-stable.tar.gz" LIBEVENT_NAME=libevent-release-2.1.12-stable.tar.gz @@ -466,7 +460,6 @@ HADOOP_LIBS_X86_MD5SUM="96117450170487f007ffeca5ddf62f7e" # all thirdparties which need to be downloaded is set in array TP_ARCHIVES export TP_ARCHIVES=( - 'CLUCENE' 'LIBEVENT' 'OPENSSL' 'THRIFT'