[thirdparty](clucene) add clucene deps for doris inverted index (#15807)
As part of Inverted Index DSIP steps, we'd like to contribute our inverted index implementations step by step. First of all we need to introduce clucene to doris thirdparty libs, because inverted index implementations are based on lucence API and index file format, also we add our features and performance improvements base on clucene, so we need to maintain the repo ourselves
This commit is contained in:
1
build.sh
1
build.sh
@ -575,6 +575,7 @@ EOF
|
||||
copy_common_files "${DORIS_OUTPUT}/be/"
|
||||
mkdir -p "${DORIS_OUTPUT}/be/log"
|
||||
mkdir -p "${DORIS_OUTPUT}/be/storage"
|
||||
cp -r -p "${DORIS_THIRDPARTY}/installed/share/dict" "${DORIS_OUTPUT}/be/"
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_BROKER}" -eq 1 ]]; then
|
||||
|
||||
29
thirdparty/build-thirdparty.sh
vendored
29
thirdparty/build-thirdparty.sh
vendored
@ -146,6 +146,7 @@ if [[ "${CC}" == *gcc ]]; then
|
||||
warning_stringop_truncation='-Wno-stringop-truncation'
|
||||
warning_class_memaccess='-Wno-class-memaccess'
|
||||
warning_array_parameter='-Wno-array-parameter'
|
||||
warning_narrowing='-Wno-narrowing'
|
||||
boost_toolset='gcc'
|
||||
elif [[ "${CC}" == *clang ]]; then
|
||||
warning_uninitialized='-Wno-uninitialized'
|
||||
@ -156,6 +157,7 @@ elif [[ "${CC}" == *clang ]]; then
|
||||
warning_reserved_identifier='-Wno-reserved-identifier'
|
||||
warning_suggest_override='-Wno-suggest-override -Wno-suggest-destructor-override'
|
||||
warning_option_ignored='-Wno-option-ignored'
|
||||
warning_narrowing='-Wno-c++11-narrowing'
|
||||
boost_toolset='clang'
|
||||
libhdfs_cxx17='-std=c++1z'
|
||||
|
||||
@ -1545,12 +1547,39 @@ build_concurrentqueue() {
|
||||
cp ./*.h "${TP_INSTALL_DIR}/include/"
|
||||
}
|
||||
|
||||
#clucene
|
||||
build_clucene() {
|
||||
if [[ -z ${USE_AVX2} ]]; then
|
||||
USE_AVX2=1
|
||||
fi
|
||||
if [[ -z ${BUILD_TYPE} ]]; then
|
||||
BUILD_TYPE=Release
|
||||
fi
|
||||
check_if_source_exist "${CLUCENE_SOURCE}"
|
||||
cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
|
||||
mkdir -p "${BUILD_DIR}" && cd "${BUILD_DIR}"
|
||||
rm -rf CMakeCache.txt CMakeFiles/
|
||||
|
||||
${CMAKE_CMD} -G "${GENERATOR}" -DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}" -DBUILD_STATIC_LIBRARIES=ON \
|
||||
-DBUILD_SHARED_LIBRARIES=OFF -DCMAKE_CXX_FLAGS="-fno-omit-frame-pointer ${warning_narrowing}" \
|
||||
-DUSE_STAT64=0 -DUSE_AVX2="${USE_AVX2}" -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" -DBUILD_CONTRIBS_LIB=ON ..
|
||||
${BUILD_SYSTEM} -j "${PARALLEL}"
|
||||
${BUILD_SYSTEM} install
|
||||
|
||||
cd "${TP_SOURCE_DIR}/${CLUCENE_SOURCE}"
|
||||
if [[ ! -d "${TP_INSTALL_DIR}"/share ]]; then
|
||||
mkdir -p "${TP_INSTALL_DIR}"/share
|
||||
fi
|
||||
cp -rf src/contribs-lib/CLucene/analysis/jieba/dict "${TP_INSTALL_DIR}"/share/
|
||||
}
|
||||
|
||||
if [[ "$(uname -s)" == 'Darwin' ]]; then
|
||||
echo 'build for Darwin'
|
||||
build_binutils
|
||||
build_gettext
|
||||
fi
|
||||
|
||||
build_clucene
|
||||
build_libunixodbc
|
||||
build_openssl
|
||||
build_libevent
|
||||
|
||||
7
thirdparty/vars.sh
vendored
7
thirdparty/vars.sh
vendored
@ -54,6 +54,12 @@ export TP_JAR_DIR="${TP_INSTALL_DIR}/lib/jar"
|
||||
# of all thirdparties
|
||||
#####################################################
|
||||
|
||||
#clucene
|
||||
CLUCENE_DOWNLOAD="https://github.com/apache/doris-thirdparty/archive/refs/tags/libclucene-v2.4.4.zip"
|
||||
CLUCENE_NAME="doris-thirdparty-libclucene-v2.4.4.zip"
|
||||
CLUCENE_SOURCE="doris-thirdparty-libclucene-v2.4.4"
|
||||
CLUCENE_MD5SUM="431b4c2dc5c75df5e114da72a2e9b74a"
|
||||
|
||||
# libevent
|
||||
LIBEVENT_DOWNLOAD="https://github.com/libevent/libevent/archive/release-2.1.12-stable.tar.gz"
|
||||
LIBEVENT_NAME=libevent-release-2.1.12-stable.tar.gz
|
||||
@ -443,6 +449,7 @@ CONCURRENTQUEUE_MD5SUM="118e5bb661b567634647312991e10222"
|
||||
|
||||
# all thirdparties which need to be downloaded is set in array TP_ARCHIVES
|
||||
export TP_ARCHIVES=(
|
||||
'CLUCENE'
|
||||
'LIBEVENT'
|
||||
'OPENSSL'
|
||||
'THRIFT'
|
||||
|
||||
Reference in New Issue
Block a user