mirror of
https://git.postgresql.org/git/postgresql.git
synced 2026-02-15 10:57:02 +08:00
Optimize popcount functions with ARM Neon intrinsics.
This commit introduces Neon implementations of pg_popcount{32,64},
pg_popcount(), and pg_popcount_masked(). As in simd.h, we assume
that all available AArch64 hardware supports Neon, so we don't need
any new configure-time or runtime checks. Some compilers already
emit Neon instructions for these functions, but our hand-rolled
implementations for pg_popcount() and pg_popcount_masked()
performed better in testing, likely due to better instruction-level
parallelism.
Author: "Chiranmoy.Bhattacharya@fujitsu.com" <Chiranmoy.Bhattacharya@fujitsu.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/010101936e4aaa70-b474ab9e-b9ce-474d-a3ba-a3dc223d295c-000000%40us-west-2.amazonses.com
This commit is contained in:
@ -298,6 +298,15 @@ pg_ceil_log2_64(uint64 num)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* On AArch64, we can use Neon instructions if the compiler provides access to
|
||||
* them (as indicated by __ARM_NEON). As in simd.h, we assume that all
|
||||
* available 64-bit hardware has Neon support.
|
||||
*/
|
||||
#if defined(__aarch64__) && defined(__ARM_NEON)
|
||||
#define POPCNT_AARCH64 1
|
||||
#endif
|
||||
|
||||
#ifdef TRY_POPCNT_X86_64
|
||||
/* Attempt to use the POPCNT instruction, but perform a runtime check first */
|
||||
extern PGDLLIMPORT int (*pg_popcount32) (uint32 word);
|
||||
|
||||
Reference in New Issue
Block a user