Support AVX2/FMA intrinsics in Audio Resampler module

From the test result, using AVX2/FMA is 1.60x faster than SSE on atlas.

Bug: webrtc:11663
Test: common_audio_unittests on atlas and octopus.
Change-Id: Ibd45ea46aa97d5790a24e5116f741592b95f6416
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/176382
Reviewed-by: Per Åhgren <peah@webrtc.org>
Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31810}
This commit is contained in:
Zhaoliang Ma
2020-07-22 17:34:56 +08:00
committed by Commit Bot
parent 6f148566dc
commit 1ca8d87239
9 changed files with 168 additions and 59 deletions

View File

@ -24,6 +24,20 @@ int GetCPUInfoNoASM(CPUFeature feature) {
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
// xgetbv returns the value of an Intel Extended Control Register (XCR).
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
uint64_t xgetbv(uint32_t xcr) {
#if defined(_MSC_VER)
return _xgetbv(xcr);
#else
uint32_t eax, edx;
__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
return (static_cast<uint64_t>(edx) << 32) | eax;
#endif // _MSC_VER
}
#ifndef _MSC_VER
// Intrinsic for "cpuid".
#if defined(__pic__) && defined(__i386__)
@ -41,7 +55,7 @@ static inline void __cpuid(int cpu_info[4], int info_type) {
__asm__ volatile("cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
: "a"(info_type));
: "a"(info_type), "c"(0));
}
#endif
#endif // _MSC_VER
@ -51,6 +65,8 @@ static inline void __cpuid(int cpu_info[4], int info_type) {
// Actual feature detection for x86.
static int GetCPUInfo(CPUFeature feature) {
int cpu_info[4];
__cpuid(cpu_info, 0);
int num_ids = cpu_info[0];
__cpuid(cpu_info, 1);
if (feature == kSSE2) {
return 0 != (cpu_info[3] & 0x04000000);
@ -58,6 +74,23 @@ static int GetCPUInfo(CPUFeature feature) {
if (feature == kSSE3) {
return 0 != (cpu_info[2] & 0x00000001);
}
if (feature == kAVX2) {
// Interpret CPU feature information.
int cpu_info7[4] = {-1};
if (num_ids >= 7) {
__cpuid(cpu_info7, 7);
}
#if defined(WEBRTC_ENABLE_AVX2)
return (cpu_info[2] & 0x10000000) != 0 &&
(cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
(cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
(xgetbv(0) & 0x00000006) == 6 /* XSAVE enabled by kernel */ &&
(cpu_info7[1] & 0x00000020) != 0;
#else
return 0;
#endif // WEBRTC_ENABLE_AVX2
}
return 0;
}
#else