Adds multi-channel support to new ADM2 on Windows.

Now checks the preferred channel configuration and requests implicit channel upmixing (audio engine extends from 2 to N channels internally) if the preferred number of channels is larger than two; i.e., initialize the stream in stereo even if the preferred configuration is multi-channel. To summarize: with this CL, it is now possible to use e.g. a 7.1 headset with a native WebRTC client. All internal processing in WebRTC will be in stereo, and the audio device will be opened up in stereo as well to match WebRTC. Before this change, we would open up the audio device using 8 channels but that was not supported by WebRTC. Bug: webrtc:9265 Change-Id: I1530fee28c4b8b5cda29ab6baf8d65fd391d935d Reviewed-on: https://webrtc-review.googlesource.com/98421 Commit-Queue: Henrik Andreassson <henrika@webrtc.org> Reviewed-by: Oskar Sundbom <ossu@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24743}
2018-09-14 15:37:34 +02:00
parent 56b5a6c4b2
commit c7d935899a
3 changed files with 201 additions and 17 deletions
--- a/modules/audio_device/audio_device_unittest.cc
+++ b/modules/audio_device/audio_device_unittest.cc
@ -126,7 +126,7 @@ int IndexToMilliseconds(size_t index, size_t frames_per_10ms_buffer) {
 // The container is a std::list container and access is protected with a lock
 // since both sides (playout and recording) are driven by its own thread.
 // Note that, we know by design that the size of the audio buffer will not
-// change over time and that both sides will use the same size.
+// change over time and that both sides will in most cases use the same size.
 class FifoAudioStream : public AudioStream {
 public:
  void Write(rtc::ArrayView<const int16_t> source) override {
@ -152,8 +152,27 @@ class FifoAudioStream : public AudioStream {
      std::fill(destination.begin(), destination.end(), 0);
    } else {
      const Buffer16& buffer = fifo_.front();
-      RTC_CHECK_EQ(buffer.size(), destination.size());
+      if (buffer.size() == destination.size()) {
+        // Default case where input and output uses same sample rate and
+        // channel configuration. No conversion is needed.
        std::copy(buffer.begin(), buffer.end(), destination.begin());
+      } else if (destination.size() == 2 * buffer.size()) {
+        // Recorded input signal in |buffer| is in mono. Do channel upmix to
+        // match stereo output (1 -> 2).
+        for (size_t i = 0; i < buffer.size(); ++i) {
+          destination[2 * i] = buffer[i];
+          destination[2 * i + 1] = buffer[i];
+        }
+      } else if (buffer.size() == 2 * destination.size()) {
+        // Recorded input signal in |buffer| is in stereo. Do channel downmix
+        // to match mono output (2 -> 1).
+        for (size_t i = 0; i < destination.size(); ++i) {
+          destination[i] =
+              (static_cast<int32_t>(buffer[2 * i]) + buffer[2 * i + 1]) / 2;
+        }
+      } else {
+        RTC_NOTREACHED() << "Required conversion is not support";
+      }
      fifo_.pop_front();
    }
  }
@ -1060,10 +1079,10 @@ TEST_P(AudioDeviceTest, DISABLED_MeasureLoopbackLatency) {
      std::max(kTestTimeOutInMilliseconds, 1000 * kMeasureLatencyTimeInSec)));
  StopRecording();
  StopPlayout();
-  // Verify that the correct number of transmitted impulses are detected.
-  EXPECT_EQ(audio_stream.num_latency_values(),
+  // Verify that a sufficient number of transmitted impulses are detected.
+  EXPECT_GE(audio_stream.num_latency_values(),
            static_cast<size_t>(
-                kImpulseFrequencyInHz * kMeasureLatencyTimeInSec - 1));
+                kImpulseFrequencyInHz * kMeasureLatencyTimeInSec - 2));
  // Print out min, max and average delay values for debugging purposes.
  audio_stream.PrintResults();
 }
--- a/modules/audio_device/win/core_audio_base_win.cc
+++ b/modules/audio_device/win/core_audio_base_win.cc
@ -362,7 +362,21 @@ bool CoreAudioBase::Init() {
  // Define the output WAVEFORMATEXTENSIBLE format in |format_|.
  WAVEFORMATEX* format = &format_.Format;
  format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
+  // Check the preferred channel configuration and request implicit channel
+  // upmixing (audio engine extends from 2 to N channels internally) if the
+  // preferred number of channels is larger than two; i.e., initialize the
+  // stream in stereo even if the preferred configuration is multi-channel.
+  if (params.channels() <= 2) {
    format->nChannels = rtc::dchecked_cast<WORD>(params.channels());
+  } else {
+    // TODO(henrika): ensure that this approach works on different multi-channel
+    // devices. Verified on:
+    // - Corsair VOID PRO Surround USB Adapter (supports 7.1)
+    RTC_LOG(LS_WARNING)
+        << "Using channel upmixing in WASAPI audio engine (2 => "
+        << params.channels() << ")";
+    format->nChannels = 2;
+  }
  format->nSamplesPerSec = params.sample_rate();
  format->wBitsPerSample = rtc::dchecked_cast<WORD>(params.bits_per_sample());
  format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
@ -371,10 +385,8 @@ bool CoreAudioBase::Init() {
  // Add the parts which are unique for the WAVE_FORMAT_EXTENSIBLE structure.
  format_.Samples.wValidBitsPerSample =
      rtc::dchecked_cast<WORD>(params.bits_per_sample());
-  // TODO(henrika): improve (common for input and output?)
-  format_.dwChannelMask = params.channels() == 1
-                              ? SPEAKER_FRONT_CENTER
-                              : SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;
+  format_.dwChannelMask =
+      format->nChannels == 1 ? KSAUDIO_SPEAKER_MONO : KSAUDIO_SPEAKER_STEREO;
  format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
  RTC_DLOG(INFO) << core_audio_utility::WaveFormatExToString(&format_);

@ -481,10 +493,8 @@ bool CoreAudioBase::Init() {
    return false;
  }

-  // Store valid COM interface.
-  if (audio_client) {
+  // Store valid COM interfaces.
  audio_client_ = audio_client;
-  }
  audio_session_control_ = audio_session_control;

  return true;
--- a/modules/audio_device/win/core_audio_utility_win.cc
+++ b/modules/audio_device/win/core_audio_utility_win.cc
@ -37,6 +37,149 @@ namespace {

 using core_audio_utility::ErrorToString;

+// Converts from channel mask to list of included channels.
+// Each audio data format contains channels for one or more of the positions
+// listed below. The number of channels simply equals the number of nonzero
+// flag bits in the |channel_mask|. The relative positions of the channels
+// within each block of audio data always follow the same relative ordering
+// as the flag bits in the table below. For example, if |channel_mask| contains
+// the value 0x00000033, the format defines four audio channels that are
+// assigned for playback to the front-left, front-right, back-left,
+// and back-right speakers, respectively. The channel data should be interleaved
+// in that order within each block.
+std::string ChannelMaskToString(DWORD channel_mask) {
+  std::string ss;
+  int n = 0;
+  if (channel_mask & SPEAKER_FRONT_LEFT) {
+    ss += "FRONT_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_FRONT_RIGHT) {
+    ss += "FRONT_RIGHT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_FRONT_CENTER) {
+    ss += "FRONT_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_LOW_FREQUENCY) {
+    ss += "LOW_FREQUENCY | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_BACK_LEFT) {
+    ss += "BACK_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_BACK_RIGHT) {
+    ss += "BACK_RIGHT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_FRONT_LEFT_OF_CENTER) {
+    ss += "FRONT_LEFT_OF_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_FRONT_RIGHT_OF_CENTER) {
+    ss += "RIGHT_OF_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_BACK_CENTER) {
+    ss += "BACK_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_SIDE_LEFT) {
+    ss += "SIDE_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_SIDE_RIGHT) {
+    ss += "SIDE_RIGHT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_CENTER) {
+    ss += "TOP_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_FRONT_LEFT) {
+    ss += "TOP_FRONT_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_FRONT_CENTER) {
+    ss += "TOP_FRONT_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_FRONT_RIGHT) {
+    ss += "TOP_FRONT_RIGHT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_BACK_LEFT) {
+    ss += "TOP_BACK_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_BACK_CENTER) {
+    ss += "TOP_BACK_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_BACK_RIGHT) {
+    ss += "TOP_BACK_RIGHT | ";
+    ++n;
+  }
+
+  if (!ss.empty()) {
+    // Delete last appended " | " substring.
+    ss.erase(ss.end() - 3, ss.end());
+  }
+  ss += " (";
+  ss += std::to_string(n);
+  ss += ")";
+  return ss;
+}
+
+// Converts from channel mask to DirectSound speaker configuration.
+// The values below are copied from ksmedia.h.
+// Example: KSAUDIO_SPEAKER_STEREO = (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT).
+const char* DirectSoundConfigToString(DWORD channel_mask) {
+  switch (channel_mask) {
+    case KSAUDIO_SPEAKER_DIRECTOUT:
+      return "KSAUDIO_DIRECTOUT";
+    case KSAUDIO_SPEAKER_MONO:
+      // Front center (C)
+      return "KSAUDIO_MONO";
+    case KSAUDIO_SPEAKER_1POINT1:
+      return "KSAUDIO_1POINT1";
+    case KSAUDIO_SPEAKER_STEREO:
+      // Front left (L), front right (R).
+      return "KSAUDIO_STEREO";
+    case KSAUDIO_SPEAKER_2POINT1:
+      return "KSAUDIO_2POINT1";
+    case KSAUDIO_SPEAKER_3POINT0:
+      return "KSAUDIO_3POINT0";
+    case KSAUDIO_SPEAKER_3POINT1:
+      return "KSAUDIO_3POINT1";
+    case KSAUDIO_SPEAKER_QUAD:
+      // L, R, back left (Lb), back right (Rb).
+      return "KSAUDIO_QUAD";
+    case KSAUDIO_SPEAKER_SURROUND:
+      // L, R, front center (C), back center (Cb).
+      return "KSAUDIO_SURROUND";
+    case KSAUDIO_SPEAKER_5POINT0:
+      return "KSAUDIO_5POINT0";
+    case KSAUDIO_SPEAKER_5POINT1:
+      return "KSAUDIO_5POINT1";
+    case KSAUDIO_SPEAKER_7POINT0:
+      return "KSAUDIO_7POINT0";
+    case KSAUDIO_SPEAKER_7POINT1:
+      // L, R, C, Lb, Rb, front left-of-center, front right-of-center, LFE.
+      return "KSAUDIO_7POINT1";
+    case KSAUDIO_SPEAKER_5POINT1_SURROUND:
+      // L, R, C, side left (Ls), side right (Rs), LFE.
+      return "KSAUDIO_5POINT1_SURROUND";
+    case KSAUDIO_SPEAKER_7POINT1_SURROUND:
+      // L, R, C, Lb, Rb, Ls, Rs, LFE.
+      return "KSAUDIO_7POINT1_SURROUND";
+    default:
+      return "KSAUDIO_INVALID";
+  }
+}
+
 bool LoadAudiosesDll() {
  static const wchar_t* const kAudiosesDLL =
      L"%WINDIR%\\system32\\audioses.dll";
@ -394,7 +537,6 @@ HRESULT GetPreferredAudioParametersInternal(IAudioClient* client,
  // const size_t bits_per_sample = AudioParameters::kBitsPerSample;
  // TODO(henrika): improve channel layout support.
  const size_t channels = mix_format.Format.nChannels;
-  RTC_DCHECK_LE(channels, 2);

  // Use the native device period to derive the smallest possible buffer size
  // in shared mode.
@ -669,8 +811,17 @@ HRESULT SetClientProperties(IAudioClient2* client) {
  // TODO(henrika): pros and cons compared with AUDCLNT_STREAMOPTIONS_NONE?
  props.Options |= AUDCLNT_STREAMOPTIONS_NONE;
  // Requires System.Devices.AudioDevice.RawProcessingSupported.
+  // The application can choose to *always ignore* the OEM AEC/AGC by setting
+  // the AUDCLNT_STREAMOPTIONS_RAW flag in the call to SetClientProperties.
+  // This flag will preserve the user experience aspect of Communications
+  // streams, but will not insert any OEM provided communications specific
+  // processing in the audio signal path.
  // props.Options |= AUDCLNT_STREAMOPTIONS_RAW;
+
  // If it is important to avoid resampling in the audio engine, set this flag.
+  // AUDCLNT_STREAMOPTIONS_MATCH_FORMAT (or anything in IAudioClient3) is not
+  // an appropriate interface to use for communications scenarios.
+  // This interface is mainly meant for pro audio scenarios.
  // props.Options |= AUDCLNT_STREAMOPTIONS_MATCH_FORMAT;
  RTC_DLOG(INFO) << "options: 0x" << rtc::ToHex(props.Options);
  error = client->SetClientProperties(&props);
@ -1001,7 +1152,7 @@ HRESULT SharedModeInitializeLowLatency(IAudioClient3* client,
  }

  // Define stream flags.
-  DWORD stream_flags = 0;
+  DWORD stream_flags = AUDCLNT_STREAMFLAGS_NOPERSIST;
  bool use_event =
      (event_handle != nullptr && event_handle != INVALID_HANDLE_VALUE);
  if (use_event) {
@ -1215,6 +1366,10 @@ std::string WaveFormatExToString(const WAVEFORMATEXTENSIBLE* format) {
  } else {
    ss << ", SubFormat: NOT_SUPPORTED";
  }
+  ss.AppendFormat("\nChannel configuration: %s",
+                  ChannelMaskToString(format->dwChannelMask).c_str());
+  ss.AppendFormat("\nDirectSound configuration : %s",
+                  DirectSoundConfigToString(format->dwChannelMask));
  return ss.str();
 }