diff --git a/modules/audio_device/audio_device_unittest.cc b/modules/audio_device/audio_device_unittest.cc index 3f2a3f3ddc..48cb43273c 100644 --- a/modules/audio_device/audio_device_unittest.cc +++ b/modules/audio_device/audio_device_unittest.cc @@ -126,7 +126,7 @@ int IndexToMilliseconds(size_t index, size_t frames_per_10ms_buffer) { // The container is a std::list container and access is protected with a lock // since both sides (playout and recording) are driven by its own thread. // Note that, we know by design that the size of the audio buffer will not -// change over time and that both sides will use the same size. +// change over time and that both sides will in most cases use the same size. class FifoAudioStream : public AudioStream { public: void Write(rtc::ArrayView source) override { @@ -152,8 +152,27 @@ class FifoAudioStream : public AudioStream { std::fill(destination.begin(), destination.end(), 0); } else { const Buffer16& buffer = fifo_.front(); - RTC_CHECK_EQ(buffer.size(), destination.size()); - std::copy(buffer.begin(), buffer.end(), destination.begin()); + if (buffer.size() == destination.size()) { + // Default case where input and output uses same sample rate and + // channel configuration. No conversion is needed. + std::copy(buffer.begin(), buffer.end(), destination.begin()); + } else if (destination.size() == 2 * buffer.size()) { + // Recorded input signal in |buffer| is in mono. Do channel upmix to + // match stereo output (1 -> 2). + for (size_t i = 0; i < buffer.size(); ++i) { + destination[2 * i] = buffer[i]; + destination[2 * i + 1] = buffer[i]; + } + } else if (buffer.size() == 2 * destination.size()) { + // Recorded input signal in |buffer| is in stereo. Do channel downmix + // to match mono output (2 -> 1). + for (size_t i = 0; i < destination.size(); ++i) { + destination[i] = + (static_cast(buffer[2 * i]) + buffer[2 * i + 1]) / 2; + } + } else { + RTC_NOTREACHED() << "Required conversion is not support"; + } fifo_.pop_front(); } } @@ -1060,10 +1079,10 @@ TEST_P(AudioDeviceTest, DISABLED_MeasureLoopbackLatency) { std::max(kTestTimeOutInMilliseconds, 1000 * kMeasureLatencyTimeInSec))); StopRecording(); StopPlayout(); - // Verify that the correct number of transmitted impulses are detected. - EXPECT_EQ(audio_stream.num_latency_values(), + // Verify that a sufficient number of transmitted impulses are detected. + EXPECT_GE(audio_stream.num_latency_values(), static_cast( - kImpulseFrequencyInHz * kMeasureLatencyTimeInSec - 1)); + kImpulseFrequencyInHz * kMeasureLatencyTimeInSec - 2)); // Print out min, max and average delay values for debugging purposes. audio_stream.PrintResults(); } diff --git a/modules/audio_device/win/core_audio_base_win.cc b/modules/audio_device/win/core_audio_base_win.cc index 2e6e9fa598..43e7bba607 100644 --- a/modules/audio_device/win/core_audio_base_win.cc +++ b/modules/audio_device/win/core_audio_base_win.cc @@ -362,7 +362,21 @@ bool CoreAudioBase::Init() { // Define the output WAVEFORMATEXTENSIBLE format in |format_|. WAVEFORMATEX* format = &format_.Format; format->wFormatTag = WAVE_FORMAT_EXTENSIBLE; - format->nChannels = rtc::dchecked_cast(params.channels()); + // Check the preferred channel configuration and request implicit channel + // upmixing (audio engine extends from 2 to N channels internally) if the + // preferred number of channels is larger than two; i.e., initialize the + // stream in stereo even if the preferred configuration is multi-channel. + if (params.channels() <= 2) { + format->nChannels = rtc::dchecked_cast(params.channels()); + } else { + // TODO(henrika): ensure that this approach works on different multi-channel + // devices. Verified on: + // - Corsair VOID PRO Surround USB Adapter (supports 7.1) + RTC_LOG(LS_WARNING) + << "Using channel upmixing in WASAPI audio engine (2 => " + << params.channels() << ")"; + format->nChannels = 2; + } format->nSamplesPerSec = params.sample_rate(); format->wBitsPerSample = rtc::dchecked_cast(params.bits_per_sample()); format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels; @@ -371,10 +385,8 @@ bool CoreAudioBase::Init() { // Add the parts which are unique for the WAVE_FORMAT_EXTENSIBLE structure. format_.Samples.wValidBitsPerSample = rtc::dchecked_cast(params.bits_per_sample()); - // TODO(henrika): improve (common for input and output?) - format_.dwChannelMask = params.channels() == 1 - ? SPEAKER_FRONT_CENTER - : SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT; + format_.dwChannelMask = + format->nChannels == 1 ? KSAUDIO_SPEAKER_MONO : KSAUDIO_SPEAKER_STEREO; format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; RTC_DLOG(INFO) << core_audio_utility::WaveFormatExToString(&format_); @@ -481,10 +493,8 @@ bool CoreAudioBase::Init() { return false; } - // Store valid COM interface. - if (audio_client) { - audio_client_ = audio_client; - } + // Store valid COM interfaces. + audio_client_ = audio_client; audio_session_control_ = audio_session_control; return true; diff --git a/modules/audio_device/win/core_audio_utility_win.cc b/modules/audio_device/win/core_audio_utility_win.cc index 040672a3ce..a19ab77dc2 100644 --- a/modules/audio_device/win/core_audio_utility_win.cc +++ b/modules/audio_device/win/core_audio_utility_win.cc @@ -37,6 +37,149 @@ namespace { using core_audio_utility::ErrorToString; +// Converts from channel mask to list of included channels. +// Each audio data format contains channels for one or more of the positions +// listed below. The number of channels simply equals the number of nonzero +// flag bits in the |channel_mask|. The relative positions of the channels +// within each block of audio data always follow the same relative ordering +// as the flag bits in the table below. For example, if |channel_mask| contains +// the value 0x00000033, the format defines four audio channels that are +// assigned for playback to the front-left, front-right, back-left, +// and back-right speakers, respectively. The channel data should be interleaved +// in that order within each block. +std::string ChannelMaskToString(DWORD channel_mask) { + std::string ss; + int n = 0; + if (channel_mask & SPEAKER_FRONT_LEFT) { + ss += "FRONT_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_FRONT_RIGHT) { + ss += "FRONT_RIGHT | "; + ++n; + } + if (channel_mask & SPEAKER_FRONT_CENTER) { + ss += "FRONT_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_LOW_FREQUENCY) { + ss += "LOW_FREQUENCY | "; + ++n; + } + if (channel_mask & SPEAKER_BACK_LEFT) { + ss += "BACK_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_BACK_RIGHT) { + ss += "BACK_RIGHT | "; + ++n; + } + if (channel_mask & SPEAKER_FRONT_LEFT_OF_CENTER) { + ss += "FRONT_LEFT_OF_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_FRONT_RIGHT_OF_CENTER) { + ss += "RIGHT_OF_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_BACK_CENTER) { + ss += "BACK_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_SIDE_LEFT) { + ss += "SIDE_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_SIDE_RIGHT) { + ss += "SIDE_RIGHT | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_CENTER) { + ss += "TOP_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_FRONT_LEFT) { + ss += "TOP_FRONT_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_FRONT_CENTER) { + ss += "TOP_FRONT_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_FRONT_RIGHT) { + ss += "TOP_FRONT_RIGHT | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_BACK_LEFT) { + ss += "TOP_BACK_LEFT | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_BACK_CENTER) { + ss += "TOP_BACK_CENTER | "; + ++n; + } + if (channel_mask & SPEAKER_TOP_BACK_RIGHT) { + ss += "TOP_BACK_RIGHT | "; + ++n; + } + + if (!ss.empty()) { + // Delete last appended " | " substring. + ss.erase(ss.end() - 3, ss.end()); + } + ss += " ("; + ss += std::to_string(n); + ss += ")"; + return ss; +} + +// Converts from channel mask to DirectSound speaker configuration. +// The values below are copied from ksmedia.h. +// Example: KSAUDIO_SPEAKER_STEREO = (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT). +const char* DirectSoundConfigToString(DWORD channel_mask) { + switch (channel_mask) { + case KSAUDIO_SPEAKER_DIRECTOUT: + return "KSAUDIO_DIRECTOUT"; + case KSAUDIO_SPEAKER_MONO: + // Front center (C) + return "KSAUDIO_MONO"; + case KSAUDIO_SPEAKER_1POINT1: + return "KSAUDIO_1POINT1"; + case KSAUDIO_SPEAKER_STEREO: + // Front left (L), front right (R). + return "KSAUDIO_STEREO"; + case KSAUDIO_SPEAKER_2POINT1: + return "KSAUDIO_2POINT1"; + case KSAUDIO_SPEAKER_3POINT0: + return "KSAUDIO_3POINT0"; + case KSAUDIO_SPEAKER_3POINT1: + return "KSAUDIO_3POINT1"; + case KSAUDIO_SPEAKER_QUAD: + // L, R, back left (Lb), back right (Rb). + return "KSAUDIO_QUAD"; + case KSAUDIO_SPEAKER_SURROUND: + // L, R, front center (C), back center (Cb). + return "KSAUDIO_SURROUND"; + case KSAUDIO_SPEAKER_5POINT0: + return "KSAUDIO_5POINT0"; + case KSAUDIO_SPEAKER_5POINT1: + return "KSAUDIO_5POINT1"; + case KSAUDIO_SPEAKER_7POINT0: + return "KSAUDIO_7POINT0"; + case KSAUDIO_SPEAKER_7POINT1: + // L, R, C, Lb, Rb, front left-of-center, front right-of-center, LFE. + return "KSAUDIO_7POINT1"; + case KSAUDIO_SPEAKER_5POINT1_SURROUND: + // L, R, C, side left (Ls), side right (Rs), LFE. + return "KSAUDIO_5POINT1_SURROUND"; + case KSAUDIO_SPEAKER_7POINT1_SURROUND: + // L, R, C, Lb, Rb, Ls, Rs, LFE. + return "KSAUDIO_7POINT1_SURROUND"; + default: + return "KSAUDIO_INVALID"; + } +} + bool LoadAudiosesDll() { static const wchar_t* const kAudiosesDLL = L"%WINDIR%\\system32\\audioses.dll"; @@ -394,7 +537,6 @@ HRESULT GetPreferredAudioParametersInternal(IAudioClient* client, // const size_t bits_per_sample = AudioParameters::kBitsPerSample; // TODO(henrika): improve channel layout support. const size_t channels = mix_format.Format.nChannels; - RTC_DCHECK_LE(channels, 2); // Use the native device period to derive the smallest possible buffer size // in shared mode. @@ -669,8 +811,17 @@ HRESULT SetClientProperties(IAudioClient2* client) { // TODO(henrika): pros and cons compared with AUDCLNT_STREAMOPTIONS_NONE? props.Options |= AUDCLNT_STREAMOPTIONS_NONE; // Requires System.Devices.AudioDevice.RawProcessingSupported. + // The application can choose to *always ignore* the OEM AEC/AGC by setting + // the AUDCLNT_STREAMOPTIONS_RAW flag in the call to SetClientProperties. + // This flag will preserve the user experience aspect of Communications + // streams, but will not insert any OEM provided communications specific + // processing in the audio signal path. // props.Options |= AUDCLNT_STREAMOPTIONS_RAW; + // If it is important to avoid resampling in the audio engine, set this flag. + // AUDCLNT_STREAMOPTIONS_MATCH_FORMAT (or anything in IAudioClient3) is not + // an appropriate interface to use for communications scenarios. + // This interface is mainly meant for pro audio scenarios. // props.Options |= AUDCLNT_STREAMOPTIONS_MATCH_FORMAT; RTC_DLOG(INFO) << "options: 0x" << rtc::ToHex(props.Options); error = client->SetClientProperties(&props); @@ -1001,7 +1152,7 @@ HRESULT SharedModeInitializeLowLatency(IAudioClient3* client, } // Define stream flags. - DWORD stream_flags = 0; + DWORD stream_flags = AUDCLNT_STREAMFLAGS_NOPERSIST; bool use_event = (event_handle != nullptr && event_handle != INVALID_HANDLE_VALUE); if (use_event) { @@ -1215,6 +1366,10 @@ std::string WaveFormatExToString(const WAVEFORMATEXTENSIBLE* format) { } else { ss << ", SubFormat: NOT_SUPPORTED"; } + ss.AppendFormat("\nChannel configuration: %s", + ChannelMaskToString(format->dwChannelMask).c_str()); + ss.AppendFormat("\nDirectSound configuration : %s", + DirectSoundConfigToString(format->dwChannelMask)); return ss.str(); }