APM: add HW-only denormal disabler

Denormal numbers (see [1]) may origin in APM when the input is zeroed
after a non-zero signal. In extreme cases, instructions involving
denormal operands may run as much as 100 times slower, which seems to
be the case (to some extent) of crbug.com/1227566.

This CL adds a class that disables denormals only via hardware on x86
and on ARM. The class is used in APM and it is an adaption of [2].

Tested: appr.tc call on Chromium (Win, Mac)

[1] https://en.wikipedia.org/wiki/Denormal_number
[2] https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/platform/audio/denormal_disabler.h

Fixed: chromium:1227566
Change-Id: I0ed2eab55dc597529f09f93c26c7a01de051fdbe
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/227768
Reviewed-by: Magnus Flodman <mflodman@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#34701}
This commit is contained in:
Alessio Bazzica
2021-08-10 15:23:23 +02:00
committed by WebRTC LUCI CQ
parent db68979a20
commit 0441bb625f
7 changed files with 342 additions and 0 deletions

View File

@ -184,6 +184,7 @@ rtc_library("audio_processing") {
"../../rtc_base/synchronization:mutex",
"../../rtc_base/system:rtc_export",
"../../system_wrappers",
"../../system_wrappers:denormal_disabler",
"../../system_wrappers:field_trial",
"../../system_wrappers:metrics",
"aec3",
@ -373,6 +374,7 @@ if (rtc_include_tests) {
"../../rtc_base/system:arch",
"../../rtc_base/system:file_wrapper",
"../../system_wrappers",
"../../system_wrappers:denormal_disabler",
"../../test:fileutils",
"../../test:rtc_expect_death",
"../../test:test_support",

View File

@ -35,6 +35,7 @@
#include "rtc_base/ref_counted_object.h"
#include "rtc_base/time_utils.h"
#include "rtc_base/trace_event.h"
#include "system_wrappers/include/denormal_disabler.h"
#include "system_wrappers/include/field_trial.h"
#include "system_wrappers/include/metrics.h"
@ -254,6 +255,8 @@ AudioProcessingImpl::AudioProcessingImpl(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
use_setup_specific_default_aec3_config_(
UseSetupSpecificDefaultAec3Congfig()),
use_denormal_disabler_(
!field_trial::IsEnabled("WebRTC-ApmDenormalDisablerKillSwitch")),
capture_runtime_settings_(RuntimeSettingQueueSize()),
render_runtime_settings_(RuntimeSettingQueueSize()),
capture_runtime_settings_enqueuer_(&capture_runtime_settings_),
@ -284,6 +287,9 @@ AudioProcessingImpl::AudioProcessingImpl(
<< !!submodules_.capture_post_processor
<< "\nRender pre processor: "
<< !!submodules_.render_pre_processor;
RTC_LOG(LS_INFO) << "Denormal disabler: "
<< (DenormalDisabler::IsSupported() ? "supported"
: "unsupported");
// Mark Echo Controller enabled if a factory is injected.
capture_nonlocked_.echo_controller_enabled =
@ -791,6 +797,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
MutexLock lock_capture(&mutex_capture_);
DenormalDisabler denormal_disabler(use_denormal_disabler_);
if (aec_dump_) {
RecordUnprocessedCaptureStream(src);
@ -1080,6 +1087,7 @@ int AudioProcessingImpl::ProcessStream(const int16_t* const src,
RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
MutexLock lock_capture(&mutex_capture_);
DenormalDisabler denormal_disabler(use_denormal_disabler_);
if (aec_dump_) {
RecordUnprocessedCaptureStream(src, input_config);
@ -1109,6 +1117,7 @@ int AudioProcessingImpl::ProcessStream(const int16_t* const src,
int AudioProcessingImpl::ProcessCaptureStreamLocked() {
EmptyQueuedRenderAudioLocked();
HandleCaptureRuntimeSettings();
DenormalDisabler denormal_disabler(use_denormal_disabler_);
// Ensure that not both the AEC and AECM are active at the same time.
// TODO(peah): Simplify once the public API Enable functions for these
@ -1436,6 +1445,8 @@ int AudioProcessingImpl::ProcessReverseStream(const float* const* src,
float* const* dest) {
TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig");
MutexLock lock(&mutex_render_);
DenormalDisabler denormal_disabler(use_denormal_disabler_);
RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config));
if (submodule_states_.RenderMultiBandProcessingActive() ||
submodule_states_.RenderFullBandProcessingActive()) {
@ -1473,6 +1484,8 @@ int AudioProcessingImpl::AnalyzeReverseStreamLocked(
RTC_DCHECK_EQ(input_config.num_frames(),
formats_.api_format.reverse_input_stream().num_frames());
DenormalDisabler denormal_disabler(use_denormal_disabler_);
if (aec_dump_) {
const size_t channel_size =
formats_.api_format.reverse_input_stream().num_frames();
@ -1497,6 +1510,8 @@ int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
}
MutexLock lock(&mutex_render_);
DenormalDisabler denormal_disabler(use_denormal_disabler_);
ProcessingConfig processing_config = formats_.api_format;
processing_config.reverse_input_stream().set_sample_rate_hz(
input_config.sample_rate_hz());
@ -1531,6 +1546,7 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() {
AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity.
HandleRenderRuntimeSettings();
DenormalDisabler denormal_disabler(use_denormal_disabler_);
if (submodules_.render_pre_processor) {
submodules_.render_pre_processor->Process(render_buffer);

View File

@ -187,6 +187,8 @@ class AudioProcessingImpl : public AudioProcessing {
static int instance_count_;
const bool use_setup_specific_default_aec3_config_;
const bool use_denormal_disabler_;
SwapQueue<RuntimeSetting> capture_runtime_settings_;
SwapQueue<RuntimeSetting> render_runtime_settings_;

View File

@ -108,11 +108,25 @@ rtc_library("metrics") {
]
}
rtc_library("denormal_disabler") {
visibility = [ "*" ]
public = [ "include/denormal_disabler.h" ]
sources = [ "source/denormal_disabler.cc" ]
deps = [
"../rtc_base:checks",
"../rtc_base/system:arch",
]
if (is_clang) {
cflags_cc = [ "-Wno-unused-private-field" ]
}
}
if (rtc_include_tests && !build_with_chromium) {
rtc_test("system_wrappers_unittests") {
testonly = true
sources = [
"source/clock_unittest.cc",
"source/denormal_disabler_unittest.cc",
"source/field_trial_unittest.cc",
"source/metrics_default_unittest.cc",
"source/metrics_unittest.cc",
@ -121,6 +135,7 @@ if (rtc_include_tests && !build_with_chromium) {
]
deps = [
":denormal_disabler",
":field_trial",
":metrics",
":system_wrappers",

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef SYSTEM_WRAPPERS_INCLUDE_DENORMAL_DISABLER_H_
#define SYSTEM_WRAPPERS_INCLUDE_DENORMAL_DISABLER_H_
#include "rtc_base/system/arch.h"
namespace webrtc {
// Activates the hardware (HW) way to flush denormals (see [1]) to zero as they
// can very seriously impact performance. At destruction time restores the
// denormals handling state read by the ctor; hence, supports nested calls.
// Equals a no-op if the architecture is not x86 or ARM or if the compiler is
// not CLANG.
// [1] https://en.wikipedia.org/wiki/Denormal_number
//
// Example usage:
//
// void Foo() {
// DenormalDisabler d;
// ...
// }
class DenormalDisabler {
public:
// Ctor. If `enabled` is true and architecture and compiler are supported,
// stores the HW settings for denormals, disables denormals and sets
// `disabling_activated_` to true. Otherwise, only sets `disabling_activated_`
// to false.
explicit DenormalDisabler(bool enabled);
DenormalDisabler(const DenormalDisabler&) = delete;
DenormalDisabler& operator=(const DenormalDisabler&) = delete;
// Dtor. If `disabling_activated_` is true, restores the denormals HW settings
// read by the ctor before denormals were disabled. Otherwise it's a no-op.
~DenormalDisabler();
// Returns true if architecture and compiler are supported.
static bool IsSupported();
private:
const int status_word_;
const bool disabling_activated_;
};
} // namespace webrtc
#endif // SYSTEM_WRAPPERS_INCLUDE_DENORMAL_DISABLER_H_

View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "system_wrappers/include/denormal_disabler.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
#if defined(WEBRTC_ARCH_X86_FAMILY) && defined(__clang__)
#define WEBRTC_DENORMAL_DISABLER_X86_SUPPORTED
#endif
#if defined(WEBRTC_DENORMAL_DISABLER_X86_SUPPORTED) || \
defined(WEBRTC_ARCH_ARM_FAMILY)
#define WEBRTC_DENORMAL_DISABLER_SUPPORTED
#endif
constexpr int kUnspecifiedStatusWord = -1;
#if defined(WEBRTC_DENORMAL_DISABLER_SUPPORTED)
// Control register bit mask to disable denormals on the hardware.
#if defined(WEBRTC_DENORMAL_DISABLER_X86_SUPPORTED)
// On x86 two bits are used: flush-to-zero (FTZ) and denormals-are-zero (DAZ).
constexpr int kDenormalBitMask = 0x8040;
#elif defined(WEBRTC_ARCH_ARM_FAMILY)
// On ARM one bit is used: flush-to-zero (FTZ).
constexpr int kDenormalBitMask = 1 << 24;
#endif
// Reads the relevant CPU control register and returns its value for supported
// architectures and compilers. Otherwise returns `kUnspecifiedStatusWord`.
int ReadStatusWord() {
int result = kUnspecifiedStatusWord;
#if defined(WEBRTC_DENORMAL_DISABLER_X86_SUPPORTED)
asm volatile("stmxcsr %0" : "=m"(result));
#elif defined(WEBRTC_ARCH_ARM_FAMILY) && defined(WEBRTC_ARCH_32_BITS)
asm volatile("vmrs %[result], FPSCR" : [result] "=r"(result));
#elif defined(WEBRTC_ARCH_ARM_FAMILY) && defined(WEBRTC_ARCH_64_BITS)
asm volatile("mrs %x[result], FPCR" : [result] "=r"(result));
#endif
return result;
}
// Writes `status_word` in the relevant CPU control register if the architecture
// and the compiler are supported.
void SetStatusWord(int status_word) {
#if defined(WEBRTC_DENORMAL_DISABLER_X86_SUPPORTED)
asm volatile("ldmxcsr %0" : : "m"(status_word));
#elif defined(WEBRTC_ARCH_ARM_FAMILY) && defined(WEBRTC_ARCH_32_BITS)
asm volatile("vmsr FPSCR, %[src]" : : [src] "r"(status_word));
#elif defined(WEBRTC_ARCH_ARM_FAMILY) && defined(WEBRTC_ARCH_64_BITS)
asm volatile("msr FPCR, %x[src]" : : [src] "r"(status_word));
#endif
}
// Returns true if the status word indicates that denormals are enabled.
constexpr bool DenormalsEnabled(int status_word) {
return (status_word & kDenormalBitMask) != kDenormalBitMask;
}
#endif // defined(WEBRTC_DENORMAL_DISABLER_SUPPORTED)
} // namespace
#if defined(WEBRTC_DENORMAL_DISABLER_SUPPORTED)
DenormalDisabler::DenormalDisabler(bool enabled)
: status_word_(enabled ? ReadStatusWord() : kUnspecifiedStatusWord),
disabling_activated_(enabled && DenormalsEnabled(status_word_)) {
if (disabling_activated_) {
RTC_DCHECK_NE(status_word_, kUnspecifiedStatusWord);
SetStatusWord(status_word_ | kDenormalBitMask);
RTC_DCHECK(!DenormalsEnabled(ReadStatusWord()));
}
}
bool DenormalDisabler::IsSupported() {
return true;
}
DenormalDisabler::~DenormalDisabler() {
if (disabling_activated_) {
RTC_DCHECK_NE(status_word_, kUnspecifiedStatusWord);
SetStatusWord(status_word_);
}
}
#else
DenormalDisabler::DenormalDisabler(bool enabled)
: status_word_(kUnspecifiedStatusWord), disabling_activated_(false) {}
bool DenormalDisabler::IsSupported() {
return false;
}
DenormalDisabler::~DenormalDisabler() = default;
#endif
} // namespace webrtc

View File

@ -0,0 +1,146 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "system_wrappers/include/denormal_disabler.h"
#include <cmath>
#include <limits>
#include <vector>
#include "rtc_base/checks.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
constexpr float kSmallest = std::numeric_limits<float>::min();
// Float values such that, if used as divisors of `kSmallest`, the division
// produces a denormal or zero depending on whether denormals are enabled.
constexpr float kDenormalDivisors[] = {123.125f, 97.0f, 32.0f, 5.0f, 1.5f};
// Returns true if the result of `dividend` / `divisor` is a denormal.
// `dividend` and `divisor` must not be denormals.
bool DivisionIsDenormal(float dividend, float divisor) {
RTC_DCHECK_GE(std::fabsf(dividend), kSmallest);
RTC_DCHECK_GE(std::fabsf(divisor), kSmallest);
volatile float division = dividend / divisor;
return division != 0.0f && std::fabsf(division) < kSmallest;
}
} // namespace
class DenormalDisablerParametrization : public ::testing::TestWithParam<bool> {
};
// Checks that +inf and -inf are not zeroed regardless of whether
// architecture and compiler are supported.
TEST_P(DenormalDisablerParametrization, InfNotZeroed) {
DenormalDisabler denormal_disabler(/*enabled=*/GetParam());
constexpr float kMax = std::numeric_limits<float>::max();
for (float x : {-2.0f, 2.0f}) {
SCOPED_TRACE(x);
volatile float multiplication = kMax * x;
EXPECT_TRUE(std::isinf(multiplication));
}
}
// Checks that a NaN is not zeroed regardless of whether architecture and
// compiler are supported.
TEST_P(DenormalDisablerParametrization, NanNotZeroed) {
DenormalDisabler denormal_disabler(/*enabled=*/GetParam());
volatile float kNan = std::sqrt(-1.0f);
EXPECT_TRUE(std::isnan(kNan));
}
INSTANTIATE_TEST_SUITE_P(DenormalDisabler,
DenormalDisablerParametrization,
::testing::Values(false, true),
[](const ::testing::TestParamInfo<bool>& info) {
return info.param ? "enabled" : "disabled";
});
// Checks that denormals are not zeroed if `DenormalDisabler` is disabled and
// architecture and compiler are supported.
TEST(DenormalDisabler, DoNotZeroDenormalsIfDisabled) {
if (!DenormalDisabler::IsSupported()) {
GTEST_SKIP() << "Unsupported platform.";
}
ASSERT_TRUE(DivisionIsDenormal(kSmallest, kDenormalDivisors[0]))
<< "Precondition not met: denormals must be enabled.";
DenormalDisabler denormal_disabler(/*enabled=*/false);
for (float x : kDenormalDivisors) {
SCOPED_TRACE(x);
EXPECT_TRUE(DivisionIsDenormal(-kSmallest, x));
EXPECT_TRUE(DivisionIsDenormal(kSmallest, x));
}
}
// Checks that denormals are zeroed if `DenormalDisabler` is enabled if
// architecture and compiler are supported.
TEST(DenormalDisabler, ZeroDenormals) {
if (!DenormalDisabler::IsSupported()) {
GTEST_SKIP() << "Unsupported platform.";
}
DenormalDisabler denormal_disabler(/*enabled=*/true);
for (float x : kDenormalDivisors) {
SCOPED_TRACE(x);
EXPECT_FALSE(DivisionIsDenormal(-kSmallest, x));
EXPECT_FALSE(DivisionIsDenormal(kSmallest, x));
}
}
// Checks that the `DenormalDisabler` dtor re-enables denormals if previously
// enabled and architecture and compiler are supported.
TEST(DenormalDisabler, RestoreDenormalsEnabled) {
if (!DenormalDisabler::IsSupported()) {
GTEST_SKIP() << "Unsupported platform.";
}
ASSERT_TRUE(DivisionIsDenormal(kSmallest, kDenormalDivisors[0]))
<< "Precondition not met: denormals must be enabled.";
{
DenormalDisabler denormal_disabler(/*enabled=*/true);
ASSERT_FALSE(DivisionIsDenormal(kSmallest, kDenormalDivisors[0]));
}
EXPECT_TRUE(DivisionIsDenormal(kSmallest, kDenormalDivisors[0]));
}
// Checks that the `DenormalDisabler` dtor keeps denormals disabled if
// architecture and compiler are supported and if previously disabled - i.e.,
// nested usage is supported.
TEST(DenormalDisabler, ZeroDenormalsNested) {
if (!DenormalDisabler::IsSupported()) {
GTEST_SKIP() << "Unsupported platform.";
}
DenormalDisabler d1(/*enabled=*/true);
ASSERT_FALSE(DivisionIsDenormal(kSmallest, kDenormalDivisors[0]));
{
DenormalDisabler d2(/*enabled=*/true);
ASSERT_FALSE(DivisionIsDenormal(kSmallest, kDenormalDivisors[0]));
}
EXPECT_FALSE(DivisionIsDenormal(kSmallest, kDenormalDivisors[0]));
}
// Checks that `DenormalDisabler` does not zero denormals if architecture and
// compiler are not supported.
TEST(DenormalDisabler, DoNotZeroDenormalsIfUnsupported) {
if (DenormalDisabler::IsSupported()) {
// TODO(bugs.webrtc.org/13057): Use GTEST_SKIP() once fixed.
return;
}
DenormalDisabler denormal_disabler(/*enabled=*/true);
for (float x : kDenormalDivisors) {
SCOPED_TRACE(x);
EXPECT_TRUE(DivisionIsDenormal(-kSmallest, x));
EXPECT_TRUE(DivisionIsDenormal(kSmallest, x));
}
}
} // namespace webrtc