[Update](build) change clucene from thirdparty to git module (#19352)

This commit is contained in:
airborne12
2023-05-19 08:25:51 +08:00
committed by GitHub
parent 3d6a13605d
commit f32deb18e9
7 changed files with 72 additions and 85 deletions

4
.gitmodules vendored
View File

@ -29,3 +29,7 @@
path = be/src/apache-orc
url = https://github.com/apache/doris-thirdparty.git
branch = orc
[submodule "be/src/clucene"]
path = be/src/clucene
url = https://github.com/apache/doris-thirdparty.git
branch = clucene

View File

@ -148,18 +148,6 @@ endif()
set(GPERFTOOLS_HOME "${THIRDPARTY_DIR}/gperftools")
# Set all libraries
add_library(ic STATIC IMPORTED)
set_target_properties(ic PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libic.a)
add_library(clucene-core STATIC IMPORTED)
set_target_properties(clucene-core PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libclucene-core-static.a)
add_library(clucene-shared STATIC IMPORTED)
set_target_properties(clucene-shared PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libclucene-shared-static.a)
add_library(clucene-contribs-lib STATIC IMPORTED)
set_target_properties(clucene-contribs-lib PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libclucene-contribs-lib.a)
add_library(gflags STATIC IMPORTED)
set_target_properties(gflags PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libgflags.a)
@ -449,6 +437,39 @@ SET(ZSTD_INCLUDE_DIR "$ENV{DORIS_THIRDPARTY}/installed/include/zstd")
add_subdirectory(${SRC_DIR}/apache-orc EXCLUDE_FROM_ALL)
target_compile_options(orc PRIVATE -Wno-implicit-fallthrough -w)
set(BUILD_STATIC_LIBRARIES ON)
set(BUILD_SHARED_LIBRARIES OFF)
set(BUILD_CONTRIBS_LIB ON)
set(BOOST_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
set(ZLIB_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
set(Roaring_ROOT "$ENV{DORIS_THIRDPARTY}/installed")
set(USE_STAT64 0)
if (USE_BTHREAD_SCANNER)
set(USE_BTHREAD ON)
else()
set(USE_BTHREAD OFF)
endif()
add_subdirectory(${SRC_DIR}/clucene EXCLUDE_FROM_ALL)
if (COMPILER_CLANG)
target_compile_options(clucene-core-static PRIVATE -fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
target_compile_options(clucene-shared-static PRIVATE -fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
target_compile_options(clucene-contribs-lib PRIVATE -fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
target_compile_options(ic PRIVATE -fno-omit-frame-pointer -Wno-c++11-narrowing -w -Wall )
else ()
target_compile_options(clucene-core-static PRIVATE -fno-omit-frame-pointer -Wno-narrowing -w -Wall )
target_compile_options(clucene-shared-static PRIVATE -fno-omit-frame-pointer -Wno-narrowing -w -Wall )
target_compile_options(clucene-contribs-lib PRIVATE -fno-omit-frame-pointer -Wno-narrowing -w -Wall )
target_compile_options(ic PRIVATE -fno-omit-frame-pointer -Wno-narrowing -w -Wall )
endif()
install(DIRECTORY
${SRC_DIR}/clucene/src/contribs-lib/CLucene/analysis/jieba/dict
DESTINATION ${OUTPUT_DIR})
# Check if functions are supported in this platform. All flags will generated
# in gensrc/build/common/env_config.h.
# You can check funcion here which depends on platform. Don't forget add this
@ -645,6 +666,13 @@ include_directories(
${CMAKE_CURRENT_BINARY_DIR}/src/apache-orc/c++/include
)
include_directories(
${CMAKE_CURRENT_BINARY_DIR}/src/clucene/src/shared
${SRC_DIR}/clucene/src/core
${SRC_DIR}/clucene/src/shared
${SRC_DIR}/clucene/src/contribs-lib
)
include_directories(
${SRC_DIR}/
${TEST_DIR}/
@ -732,10 +760,6 @@ find_package(absl)
# When adding new dependencies, If you don’t know if it can run on all platforms,
# add it here first.
set(COMMON_THIRDPARTY
ic
clucene-core
clucene-shared
clucene-contribs-lib
backtrace
rocksdb
cyrus-sasl
@ -858,6 +882,10 @@ if (WITH_MYSQL)
endif()
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} orc)
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ic)
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-core-static)
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-shared-static)
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} clucene-contribs-lib)
set(DORIS_DEPENDENCIES ${DORIS_DEPENDENCIES} ${WL_END_GROUP})

1
be/src/clucene Submodule

Submodule be/src/clucene added at 76cd035119

View File

@ -38,10 +38,10 @@
#include <math.h>
#include <string.h>
#include <CLucene/util/croaring/roaring.hh>
#include <algorithm>
#include <filesystem>
#include <ostream>
#include <roaring/roaring.hh>
#include <set>
#include "common/config.h"
@ -687,7 +687,7 @@ void InvertedIndexVisitor::visit(std::vector<char>& doc_id, std::vector<uint8_t>
visit(roaring::Roaring::read(doc_id.data(), false));
}
void InvertedIndexVisitor::visit(Roaring* doc_id, std::vector<uint8_t>& packed_value) {
void InvertedIndexVisitor::visit(roaring::Roaring* doc_id, std::vector<uint8_t>& packed_value) {
if (!matches(packed_value.data())) {
return;
}

View File

@ -252,19 +252,26 @@ if [[ ! -f "${DORIS_THIRDPARTY}/installed/lib/libbacktrace.a" ]]; then
fi
fi
echo "Update apache-orc ..."
set +e
cd "${DORIS_HOME}"
echo "Update apache-orc submodule ..."
git submodule update --init --recursive be/src/apache-orc
exit_code=$?
set -e
if [[ "${exit_code}" -ne 0 ]]; then
echo "Update apache-orc submodule failed, start to download and extract apache-orc package ..."
rm -rf "${DORIS_HOME}/be/src/apache-orc"
mkdir -p "${DORIS_HOME}/be/src/apache-orc"
curl -L https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz | tar -xz -C "${DORIS_HOME}/be/src/apache-orc" --strip-components=1
fi
update_submodule() {
local submodule_path=$1
local submodule_name=$2
local archive_url=$3
set +e
cd "${DORIS_HOME}"
echo "Update ${submodule_name} submodule ..."
git submodule update --init --recursive "${submodule_path}"
exit_code=$?
set -e
if [[ "${exit_code}" -ne 0 ]]; then
echo "Update ${submodule_name} submodule failed, start to download and extract apache-orc package ..."
mkdir -p "${DORIS_HOME}/${submodule_path}"
curl -L "${archive_url}" | tar -xz -C "${DORIS_HOME}/${submodule_path}" --strip-components=1
fi
}
update_submodule "be/src/apache-orc" "apache-orc" "https://github.com/apache/doris-thirdparty/archive/refs/heads/orc.tar.gz"
update_submodule "be/src/clucene" "clucene" "https://github.com/apache/doris-thirdparty/archive/refs/heads/clucene.tar.gz"
if [[ "${CLEAN}" -eq 1 && "${BUILD_BE}" -eq 0 && "${BUILD_FE}" -eq 0 && "${BUILD_SPARK_DPP}" -eq 0 ]]; then
clean_gensrc
@ -573,6 +580,7 @@ if [[ "${OUTPUT_BE_BINARY}" -eq 1 ]]; then
cp -r -p "${DORIS_HOME}/be/output/bin"/* "${DORIS_OUTPUT}/be/bin"/
cp -r -p "${DORIS_HOME}/be/output/conf"/* "${DORIS_OUTPUT}/be/conf"/
cp -r -p "${DORIS_HOME}/be/output/dict" "${DORIS_OUTPUT}/be/"
if [[ -d "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" ]]; then
cp -r -p "${DORIS_THIRDPARTY}/installed/lib/hadoop_hdfs/" "${DORIS_OUTPUT}/be/lib/"
@ -618,7 +626,6 @@ EOF
copy_common_files "${DORIS_OUTPUT}/be/"
mkdir -p "${DORIS_OUTPUT}/be/log"
mkdir -p "${DORIS_OUTPUT}/be/storage"
cp -r -p "${DORIS_THIRDPARTY}/installed/share/dict" "${DORIS_OUTPUT}/be/"
fi
if [[ "${BUILD_BROKER}" -eq 1 ]]; then

View File

@ -1571,51 +1571,6 @@ build_fast_float() {
cp -r ./include/fast_float "${TP_INSTALL_DIR}/include/"
}
#clucene
build_clucene() {
if [[ "$(uname -m)" == 'x86_64' ]]; then
USE_AVX2="${USE_AVX2:-1}"
else
USE_AVX2="${USE_AVX2:-0}"
fi
if [[ -z "${USE_BTHREAD_SCANNER}" ]]; then
USE_BTHREAD_SCANNER='OFF'
fi
if [[ ${USE_BTHREAD_SCANNER} == "ON" ]]; then
USE_BTHREAD=1
else
USE_BTHREAD=0
fi
check_if_source_exist "${CLUCENE_SOURCE}"
cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
mkdir -p "${BUILD_DIR}"
cd "${BUILD_DIR}"
rm -rf CMakeCache.txt CMakeFiles/
${CMAKE_CMD} -G "${GENERATOR}" \
-DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}" \
-DBUILD_STATIC_LIBRARIES=ON \
-DBUILD_SHARED_LIBRARIES=OFF \
-DBOOST_ROOT="${TP_INSTALL_DIR}" \
-DZLIB_ROOT="${TP_INSTALL_DIR}" \
-DCMAKE_CXX_FLAGS="-g -fno-omit-frame-pointer ${warning_narrowing}" \
-DUSE_STAT64=0 \
-DUSE_AVX2="${USE_AVX2}" \
-DUSE_BTHREAD="${USE_BTHREAD}" \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_CONTRIBS_LIB=ON ..
${BUILD_SYSTEM} -j "${PARALLEL}"
${BUILD_SYSTEM} install
cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
if [[ ! -d "${TP_INSTALL_DIR}"/share ]]; then
mkdir -p "${TP_INSTALL_DIR}"/share
fi
cp -rf src/contribs-lib/CLucene/analysis/jieba/dict "${TP_INSTALL_DIR}"/share/
}
# hadoop_libs_x86
build_hadoop_libs_x86() {
check_if_source_exist "${HADOOP_LIBS_X86_SOURCE}"
@ -1685,7 +1640,6 @@ if [[ "${#packages[@]}" -eq 0 ]]; then
xxhash
concurrentqueue
fast_float
clucene
)
if [[ "$(uname -s)" == 'Darwin' ]]; then
read -r -a packages <<<"binutils gettext ${packages[*]}"

7
thirdparty/vars.sh vendored
View File

@ -54,12 +54,6 @@ export TP_JAR_DIR="${TP_INSTALL_DIR}/lib/jar"
# of all thirdparties
#####################################################
#clucene
CLUCENE_DOWNLOAD="https://github.com/apache/doris-thirdparty/archive/refs/tags/libclucene-v2.4.12.tar.gz"
CLUCENE_NAME="doris-thirdparty-libclucene-v2.4.12.tar.gz"
CLUCENE_SOURCE="doris-thirdparty-libclucene-v2.4.12"
CLUCENE_MD5SUM="171035c1d4c9fe3d7307f04dd76ab3e3"
# libevent
LIBEVENT_DOWNLOAD="https://github.com/libevent/libevent/archive/release-2.1.12-stable.tar.gz"
LIBEVENT_NAME=libevent-release-2.1.12-stable.tar.gz
@ -466,7 +460,6 @@ HADOOP_LIBS_X86_MD5SUM="96117450170487f007ffeca5ddf62f7e"
# all thirdparties which need to be downloaded is set in array TP_ARCHIVES
export TP_ARCHIVES=(
'CLUCENE'
'LIBEVENT'
'OPENSSL'
'THRIFT'