diff --git a/lib/include_/sse_neon.h b/lib/include_/sse_neon.h index f79668c..e1f5e00 100644 --- a/lib/include_/sse_neon.h +++ b/lib/include_/sse_neon.h @@ -480,7 +480,7 @@ static ALWAYS_INLINE __m128i mm_testnz_epu8(_u_){ #define mm_cvtsi64_si128p(_u64p_, _u_) mm_loadu_epi64p(_u64p_,_v_) #define _mm_cvtsi64_si128(_u_) (__m128i)__lsx_vreplgr2vr_d(_u_) //---------------------------------------------- Reverse bits/bytes --------------------------------------------------------------- -static ALWAYS_INLINE __m128i mm_rbit_epi8(_v_) { +static ALWAYS_INLINE __m128i mm_rbit_epi8(__m128i _v_) { uint64_t low_src = __lsx_vpickve2gr_du(_v_, 0); uint64_t low = 0; asm volatile( @@ -549,8 +549,8 @@ static ALWAYS_INLINE uint16_t _mm_movemask_epi8(__m128i v) { __m128i sum32 = __lsx_vhaddw_wu_hu(sum16, sum16); __m128i sum64 = __lsx_vhaddw_du_wu(sum32, sum32); - // 步骤5:提取低16位结果 - return __lsx_vpickve2gr_hu(sum64, 0) | __lsx_vpickve2gr_hu(sum64, 8)<< 8; + // 步骤5:提取结果 + return (uint16_t)__lsx_vpickve2gr_bu(sum64, 0) | (((uint16_t)__lsx_vpickve2gr_bu(sum64, 8))<< 8); }