From 74051758db13d160a4244413b277ab1e01b199f7 Mon Sep 17 00:00:00 2001 From: yangwenqing <1552539019@qq.com> Date: Tue, 14 Jan 2025 14:27:47 +0800 Subject: [PATCH] add lsx add -mlsx to compile add loonagrch_sx flag fix some bugs fix some bugs fix some bugs --- cmake/Env.cmake | 3 ++ deps/oblib/src/CMakeLists.txt | 3 +- deps/oblib/src/lib/codec/ob_fast_delta.cpp | 1 - deps/oblib/src/lib/codec/ob_sse_to_neon.h | 59 +++++++++++++++++++++- src/objit/CMakeLists.txt | 2 +- src/sql/parser/sql_parser_mysql_mode.y | 8 +-- 6 files changed, 68 insertions(+), 8 deletions(-) diff --git a/cmake/Env.cmake b/cmake/Env.cmake index 095222d4e..848c2f100 100644 --- a/cmake/Env.cmake +++ b/cmake/Env.cmake @@ -370,6 +370,9 @@ elseif( ${ARCHITECTURE} STREQUAL "loongarch64" ) set(MARCHE_CFLAGS "-march=loongarch64") set(MTUNE_CFLAGS "-mabi=lp64d") set(ARCH_LDFLAGS "-latomic") + set(ARCH_C_FLAGS "-mlsx") + set(ARCH_CXX_FLAGS "-mlsx") + set(CMAKE_REQUIRED_FLAGS "${ARCH_C_FLAGS}") else() if (${OB_DISABLE_LSE}) message(STATUS "build with no-lse") diff --git a/deps/oblib/src/CMakeLists.txt b/deps/oblib/src/CMakeLists.txt index b74ea191d..7c8608d2a 100644 --- a/deps/oblib/src/CMakeLists.txt +++ b/deps/oblib/src/CMakeLists.txt @@ -119,7 +119,7 @@ if (OB_USE_CLANG) set(OBLIB_COMPILE_DEFINITIONS -fno-strict-aliasing -fno-omit-frame-pointer ${MARCH_CFLAGS} ${MTUNE_CFLAGS} -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D_NO_EXCEPTION -DTSI_STATIC_SUM -DOCI_LINK_RUNTIME - -Wall -Wextra -Wformat -Werror + -Wall -Wextra -Wformat -Werror -mlsx -Wno-deprecated -Wno-address-of-packed-member -Wno-sign-compare -Wno-tautological-compare # -Wno-psabi -Wno-c99-designator -Wno-int-in-bool-context -Wno-sizeof-array-div # -Wno-implicit-const-int-float-conversion -Wno-fortify-source -Wno-non-c-typedef-for-linkage @@ -174,6 +174,7 @@ else() -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D_NO_EXCEPTION -DOCI_LINK_RUNTIME -Wall -Wextra -Wformat -Werror #loongarch? + -mlsx -Wno-deprecated -Wno-sign-compare -Wno-psabi diff --git a/deps/oblib/src/lib/codec/ob_fast_delta.cpp b/deps/oblib/src/lib/codec/ob_fast_delta.cpp index 66610383d..3a6bf5320 100644 --- a/deps/oblib/src/lib/codec/ob_fast_delta.cpp +++ b/deps/oblib/src/lib/codec/ob_fast_delta.cpp @@ -5,7 +5,6 @@ #include "ob_fast_delta.h" #include "ob_sse_to_neon.h" - namespace oceanbase { namespace common diff --git a/deps/oblib/src/lib/codec/ob_sse_to_neon.h b/deps/oblib/src/lib/codec/ob_sse_to_neon.h index e57e0eaa1..655bddb5d 100644 --- a/deps/oblib/src/lib/codec/ob_sse_to_neon.h +++ b/deps/oblib/src/lib/codec/ob_sse_to_neon.h @@ -20,10 +20,67 @@ #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) /* GCC-compatible compiler, targeting x86/x86-64 */ #include + #elif defined(__GNUC__) && defined(__loongarch_lp64) #include -//TODO: + +// sse instruct to loongarch lsx instruct mapping +#define __m128i v4u32 + +// arithmetic #define _mm_sub_epi32(a, b) __lsx_vsub_w((a), (b)) +#define _mm_add_epi32(a, b) __lsx_vadd_w((a), (b)) + +//store +#define _mm_storeu_si128(p, a) __lsx_vst(a, (uint32_t *)(p), 0) + +//load +#define _mm_loadu_si128(p) __lsx_vld((const uint32_t *)(p),0) +#define _mm_load_si128(p) __lsx_vld((const uint32_t *)(p),0) +#define _mm_lddqu_si128(p) __lsx_vld((const uint32_t *)(p),0) + +// others +// emits the Supplemental Streaming SIMD Extensions 3 (SSSE3) instruction palignr to extract a 128-bit byte aligned value.1 +static __attribute__((always_inline)) __m128i case_alignr_vectors(v16u8 a, v16u8 b, int ralign) { + uint8_t index_shuf[16]; + for(int i = 0; i < 16; i++) { + index_shuf[i] = (uint8_t)ralign; + ralign += 1; + } + v16u8 index = __lsx_vld((uint8_t *)index_shuf, 0); + return __lsx_vshuf_b(b, a, index); +} +#define _mm_alignr_epi8(a, b, ralign) case_alignr_vectors(b, a, ralign) + +// insert or extract +// emits the Streaming SIMD Extensions 4 (SSE4) instruction pextrd. This instruction extracts a 32-bit value from a 128 bit parameter. + +#define _mm_extract_epi32(a, ndx) __lsx_vpickve2gr_wu(a, ndx) + +// set +#define _mm_set1_epi32(u) __lsx_vreplgr2vr_w(u) +#define _mm_set1_epi16(w) (__m128i)__lsx_vreplgr2vr_h(w) + +// shift +#define _mm_slli_si128(a, imm) (__m128i)((imm)<1?(a):((imm)>15?__lsx_vreplgr2vr_b(0):case_alignr_vectors(__lsx_vreplgr2vr_b(0), (v16u8)(a), 16-(imm)))) // vextq_u8: __constrange(0-15) +#define _mm_slli_epi16(a, count) (__m128i)__lsx_vsll_h((v8u16)(a), __lsx_vreplgr2vr_h((count))) +#define _mm_srli_epi16(a, count) (__m128i)__lsx_vsrl_h((v8u16)(a), __lsx_vreplgr2vr_h((count))) +#define _mm_slli_epi32(a, count) (__m128i)__lsx_vsll_w((v4u32)(a), __lsx_vreplgr2vr_w((count))) +#define _mm_srli_epi32(a, count) (__m128i)__lsx_vsrl_w((v4u32)(a), __lsx_vreplgr2vr_b((count))) + + // logical + #define _mm_or_si128(a, b) (__m128i)__lsx_vor_v((a), (b)) + #define _mm_and_si128(a, b) (__m128i)__lsx_vand_v((a), (b)) + + + // Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. + #define _mm_shuffle_epi32(a, imm) ({ const __m128i _av =a;\ + __m128i _v = __lsx_vinsgr2vr_w(__lsx_vreplgr2vr_b(0), __lsx_vpickve2gr_wu(_av, (imm) & 0x3), 0);\ + _v = __lsx_vinsgr2vr_w(_v, __lsx_vpickve2gr_wu(_av, ((imm) >> 2) & 0x3), 1);\ + _v = __lsx_vinsgr2vr_w(_v, __lsx_vpickve2gr_wu(_av, ((imm) >> 4) & 0x3), 2);\ + _v = __lsx_vinsgr2vr_w(_v, __lsx_vpickve2gr_wu(_av, ((imm) >> 6) & 0x3), 3); _v;\ + }) + #elif defined(__GNUC__) && defined(__ARM_NEON) /* GCC-compatible compiler, targeting ARM with NEON */ diff --git a/src/objit/CMakeLists.txt b/src/objit/CMakeLists.txt index 5b09b2799..8d05e0082 100644 --- a/src/objit/CMakeLists.txt +++ b/src/objit/CMakeLists.txt @@ -9,7 +9,7 @@ project(objit message(FATAL_ERROR "DEP_3RD_DIR not set") endif() -set(LLVM_DIR "${DEP_3RD_DIR}/usr/local/oceanbase/devtools/lib/cmake/llvm") +set(LLVM_DIR "/usr/lib/llvm-18/lib/cmake/llvm") find_package(LLVM REQUIRED CONFIG) include(cmake/libutils.cmake) diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index 2d45ccc8f..9c48c319a 100644 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -56,7 +56,7 @@ extern int easy_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) %} %destructor {destroy_tree($$);} -%destructor {oceanbase::common::ob_free($$);} +//%destructor {oceanbase::common::ob_free($$);} %token NAME_OB %token STRING_VALUE @@ -124,7 +124,7 @@ extern int easy_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) %left AND AND_OP %left BETWEEN CASE WHEN THEN ELSE %nonassoc LOWER_THAN_COMP -%left COMP_EQ COM P_NSEQ COMP_GE COMP_GT COMP_LE COMP_LT COMP_NE IS LIKE IN REGEXP SOUNDS +%left COMP_EQ COMP_NSEQ COMP_GE COMP_GT COMP_LE COMP_LT COMP_NE IS LIKE IN REGEXP SOUNDS %nonassoc STRING_VALUE %right ESCAPE /*for conflict for escape*/ %left '|' @@ -11271,11 +11271,11 @@ NAME_OB { $$ = $1; } -| name_list NAME_OB %prec COMMA +| name_list NAME_OB %prec LOWER_COMMA { malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 2, $1, $2); } -| name_list ',' NAME_OB %prec COMMA +| name_list ',' NAME_OB %prec LOWER_COMMA { malloc_non_terminal_node($$, result->malloc_pool_, T_LINK_NODE, 2, $1, $3); }