From 11f265c0a426a91d4bb6e20039483bd7e8cd444c Mon Sep 17 00:00:00 2001 From: Pavel P Date: Fri, 6 Sep 2019 07:59:50 -0700 Subject: [PATCH] Fix x86 build - added _mm_cvtsi64_si128/_bzhi_u64 for non x64 builds - added _BitScanForward64/_BitScanReverse64 for non x64/arm64 builds - added x86 build target to vs2017 project files - compilation fixes --- bitunpack.c | 5 +++ conf.h | 20 +++++++++-- fp.c | 3 ++ trlec.c | 1 - vs/vs2017/TurboPFor.sln | 10 ++++++ vs/vs2017/TurboPFor.vcxproj | 71 +++++++++++++++++++++++++++++++++++++ vs/vs2017/icapp.vcxproj | 70 ++++++++++++++++++++++++++++++++++++ 7 files changed, 176 insertions(+), 4 deletions(-) diff --git a/bitunpack.c b/bitunpack.c index c90e995..23f6a1b 100644 --- a/bitunpack.c +++ b/bitunpack.c @@ -764,6 +764,11 @@ size_t bitnfunpack128v32( unsigned char *__restrict in, size_t n, uint32_t *__re #define mm256_maskz_expand_epi32(_m_,_v_) _mm256_maskz_expand_epi32(_m_,_v_) #define mm256_maskz_loadu_epi32( _m_,_v_) _mm256_maskz_loadu_epi32( _m_,_v_) #else +#if !(defined(_M_X64) || defined(__amd64__)) && (defined(__i386__) || defined(_M_IX86)) +static inline __m128i _mm_cvtsi64_si128(__int64 a) { + return _mm_loadl_epi64((__m128i*)&a); +} +#endif static ALIGNED(unsigned char, permv[256][8], 32) = { 0,0,0,0,0,0,0,0, 0,1,1,1,1,1,1,1, diff --git a/conf.h b/conf.h index a8ae0b6..86ce565 100644 --- a/conf.h +++ b/conf.h @@ -96,12 +96,26 @@ static inline int __bsr32(unsigned x) { unsigned long z=0; _BitScanReverse(&z, x static inline int bsr32( unsigned x) { unsigned long z; _BitScanReverse(&z, x); return x?z+1:0; } static inline int ctz32( unsigned x) { unsigned long z; _BitScanForward(&z, x); return x?z:32; } static inline int clz32( unsigned x) { unsigned long z; _BitScanReverse(&z, x); return x?31-z:32; } - #ifdef _WIN64 -#pragma intrinsic(_BitScanReverse) + #if !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} + #endif static inline int bsr64(uint64_t x) { unsigned long z=0; _BitScanReverse64(&z, x); return x?z+1:0; } static inline int ctz64(uint64_t x) { unsigned long z; _BitScanForward64(&z, x); return x?z:64; } static inline int clz64(uint64_t x) { unsigned long z; _BitScanReverse64(&z, x); return x?63-z:64; } - #endif + #define rol32(x,s) _lrotl(x, s) #define ror32(x,s) _lrotr(x, s) diff --git a/fp.c b/fp.c index a348d0b..9f59e0a 100644 --- a/fp.c +++ b/fp.c @@ -44,6 +44,9 @@ #include #else #include +#endif +#if !(defined(_M_X64) || defined(__amd64__)) && (defined(__i386__) || defined(_M_IX86)) +#define _bzhi_u64(_u_, _b_) ((_u_) & ((1ull<<(_b_))-1)) #endif #else #define _bzhi_u64(_u_, _b_) ((_u_) & ((1ull<<(_b_))-1)) diff --git a/trlec.c b/trlec.c index ad3619f..3fb426e 100644 --- a/trlec.c +++ b/trlec.c @@ -216,7 +216,6 @@ unsigned trlec(const unsigned char *__restrict in, unsigned inlen, unsigned char continue; a: ip += ctz64(z)>>3; #else - uint32_t z; uint32_t z; SZ32; SZ32; SZ32; SZ32; __builtin_prefetch(ip +256, 0); continue; a: ip += ctz32(z)>>3; diff --git a/vs/vs2017/TurboPFor.sln b/vs/vs2017/TurboPFor.sln index 220f7f1..43f7880 100644 --- a/vs/vs2017/TurboPFor.sln +++ b/vs/vs2017/TurboPFor.sln @@ -10,17 +10,27 @@ EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 Release|x64 = Release|x64 + Release|x86 = Release|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Debug|x64.ActiveCfg = Debug|x64 {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Debug|x64.Build.0 = Debug|x64 + {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Debug|x86.ActiveCfg = Debug|Win32 + {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Debug|x86.Build.0 = Debug|Win32 {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Release|x64.ActiveCfg = Release|x64 {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Release|x64.Build.0 = Release|x64 + {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Release|x86.ActiveCfg = Release|Win32 + {A162F37F-183F-4250-88AB-9B9FBDE30B04}.Release|x86.Build.0 = Release|Win32 {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Debug|x64.ActiveCfg = Debug|x64 {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Debug|x64.Build.0 = Debug|x64 + {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Debug|x86.ActiveCfg = Debug|Win32 + {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Debug|x86.Build.0 = Debug|Win32 {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Release|x64.ActiveCfg = Release|x64 {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Release|x64.Build.0 = Release|x64 + {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Release|x86.ActiveCfg = Release|Win32 + {6876BEB8-2B45-48B9-8381-1D4094FE8868}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/vs/vs2017/TurboPFor.vcxproj b/vs/vs2017/TurboPFor.vcxproj index fe1e42f..e4fbb38 100644 --- a/vs/vs2017/TurboPFor.vcxproj +++ b/vs/vs2017/TurboPFor.vcxproj @@ -1,10 +1,18 @@ + + Debug + Win32 + Debug x64 + + Release + Win32 + Release x64 @@ -24,11 +32,21 @@ true v141 + + StaticLibrary + true + v141 + StaticLibrary false v141 + + StaticLibrary + false + v141 + @@ -37,20 +55,36 @@ + + + + + + true $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ + + true + $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ + $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ + false $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ + + false + $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ + $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ + Disabled @@ -67,6 +101,22 @@ true + + + Disabled + true + __SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + AdvancedVectorExtensions2 + MultiThreadedDebug + true + /w24003 /w24005 /w24028 /w24047 /w24090 /w24133 /w24146 /w24333 /w24789 %(AdditionalOptions) + + + Console + true + + MaxSpeed @@ -88,6 +138,27 @@ true + + + MaxSpeed + true + true + true + __SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + AdvancedVectorExtensions2 + Speed + MultiThreaded + true + /w24003 /w24005 /w24028 /w24047 /w24090 /w24133 /w24146 /w24333 /w24789 %(AdditionalOptions) + + + Console + true + true + true + + diff --git a/vs/vs2017/icapp.vcxproj b/vs/vs2017/icapp.vcxproj index b1557d2..bf75ef6 100644 --- a/vs/vs2017/icapp.vcxproj +++ b/vs/vs2017/icapp.vcxproj @@ -1,10 +1,18 @@ + + Debug + Win32 + Debug x64 + + Release + Win32 + Release x64 @@ -32,11 +40,21 @@ true v141 + + Application + true + v141 + Application false v141 + + Application + false + v141 + @@ -45,20 +63,36 @@ + + + + + + true $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ + + true + $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ + $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ + false $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ + + false + $(SolutionDir)msvc.build\$(Platform)-$(Configuration)\ + $(SolutionDir)msvc.build\.obj\$(Platform)-$(Configuration)-$(ProjectName)\ + Disabled @@ -75,6 +109,22 @@ true + + + Disabled + true + CODEC2;_CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;%(PreprocessorDefinitions) + true + /w24146 /w24133 /w24996 + MultiThreadedDebug + AdvancedVectorExtensions2 + ..\..\ext + + + Console + true + + MaxSpeed @@ -95,6 +145,26 @@ true + + + MaxSpeed + true + true + true + CODEC2;_CRT_SECURE_NO_WARNINGS;_CONSOLE;NDEBUG;%(PreprocessorDefinitions) + true + /w24146 /w24133 /w24996 + MultiThreaded + AdvancedVectorExtensions2 + ..\..\ext + + + Console + true + true + true + +