Merge git://github.com/powturbo/TurboPFor

This commit is contained in:
x
2019-12-19 18:53:30 +01:00
3 changed files with 15 additions and 18 deletions

View File

@ -137,7 +137,11 @@ static ALWAYS_INLINE __m128i mm_xore_epi16( __m128i v, __m128i sv) { return _mm_
static ALWAYS_INLINE __m128i mm_delta_epi32(__m128i v, __m128i sv) { return _mm_sub_epi32(v, _mm_or_si128(_mm_srli_si128(sv, 12), _mm_slli_si128(v, 4))); } static ALWAYS_INLINE __m128i mm_delta_epi32(__m128i v, __m128i sv) { return _mm_sub_epi32(v, _mm_or_si128(_mm_srli_si128(sv, 12), _mm_slli_si128(v, 4))); }
static ALWAYS_INLINE __m128i mm_xore_epi32( __m128i v, __m128i sv) { return _mm_xor_si128(v, _mm_or_si128(_mm_srli_si128(sv, 12), _mm_slli_si128(v, 4))); } static ALWAYS_INLINE __m128i mm_xore_epi32( __m128i v, __m128i sv) { return _mm_xor_si128(v, _mm_or_si128(_mm_srli_si128(sv, 12), _mm_slli_si128(v, 4))); }
#endif #endif
#if !defined(_M_X64) && !defined(__x86_64__) && defined(__AVX__)
#define _mm256_extract_epi64(v, index) ((__int64)((uint64_t)(uint32_t)_mm256_extract_epi32((v), (index) * 2) | (((uint64_t)(uint32_t)_mm256_extract_epi32((v), (index) * 2 + 1)) << 32)))
#endif
//------------------ Horizontal OR ----------------------------------------------- //------------------ Horizontal OR -----------------------------------------------
#ifdef __AVX2__ #ifdef __AVX2__
static ALWAYS_INLINE unsigned mm256_hor_epi32(__m256i v) { static ALWAYS_INLINE unsigned mm256_hor_epi32(__m256i v) {

View File

@ -87,19 +87,15 @@
</PropertyGroup> </PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile> <ClCompile>
<Optimization>MaxSpeed</Optimization> <Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>__SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>__SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<MultiProcessorCompilation>true</MultiProcessorCompilation> <MultiProcessorCompilation>true</MultiProcessorCompilation>
<AdditionalOptions>/w24003 /w24005 /w24028 /w24047 /w24090 /w24133 /w24146 /w24333 /w24789 %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>/w24003 /w24005 /w24028 /w24047 /w24090 /w24133 /w24146 /w24333 /w24789 %(AdditionalOptions)</AdditionalOptions>
<AdditionalIncludeDirectories>..\..</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>..\..</AdditionalIncludeDirectories>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<ExceptionHandling>false</ExceptionHandling>
<FloatingPointModel>Fast</FloatingPointModel>
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
</ClCompile> </ClCompile>
<Link> <Link>
<SubSystem>Console</SubSystem> <SubSystem>Console</SubSystem>
@ -110,7 +106,7 @@
<ClCompile> <ClCompile>
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>__SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>__SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
@ -129,7 +125,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>__SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>__SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed> <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
@ -151,7 +147,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>__SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>__SSE__;__SSE2__;__SSE3__;__SSSE3__;__SSE4_1__;__SSE4_2__;USE_SSE;USE_AVX2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed> <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>

View File

@ -19,9 +19,7 @@
</ProjectConfiguration> </ProjectConfiguration>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\..\icapp.c"> <ClCompile Include="..\..\icapp.c" />
<PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">_CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<ClCompile Include="..\getopt.c" /> <ClCompile Include="..\getopt.c" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
@ -99,13 +97,12 @@
<ClCompile> <ClCompile>
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>CODEC2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalOptions>/w24146 /w24133 /w24996</AdditionalOptions> <AdditionalOptions>/w24146 /w24133 /w24996</AdditionalOptions>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<AdditionalIncludeDirectories>..\..\ext</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>..\..\ext</AdditionalIncludeDirectories>
<ExceptionHandling>false</ExceptionHandling>
</ClCompile> </ClCompile>
<Link> <Link>
<SubSystem>Console</SubSystem> <SubSystem>Console</SubSystem>
@ -116,7 +113,7 @@
<ClCompile> <ClCompile>
<Optimization>Disabled</Optimization> <Optimization>Disabled</Optimization>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>CODEC2;_CRT_SECURE_NO_WARNINGS;_CONSOLE;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>CODEC2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalOptions>/w24146 /w24133 /w24996</AdditionalOptions> <AdditionalOptions>/w24146 /w24133 /w24996</AdditionalOptions>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
@ -134,7 +131,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_CONSOLE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>CODEC2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalOptions>/w24146 /w24133 /w24996</AdditionalOptions> <AdditionalOptions>/w24146 /w24133 /w24996</AdditionalOptions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary> <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
@ -154,7 +151,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking> <FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions> <IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck> <SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>CODEC2;_CRT_SECURE_NO_WARNINGS;_CONSOLE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> <PreprocessorDefinitions>CODEC2;_CRT_SECURE_NO_WARNINGS=;_CONSOLE;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode> <ConformanceMode>true</ConformanceMode>
<AdditionalOptions>/w24146 /w24133 /w24996</AdditionalOptions> <AdditionalOptions>/w24146 /w24133 /w24996</AdditionalOptions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary> <RuntimeLibrary>MultiThreaded</RuntimeLibrary>