diff --git a/modules/audio_device/audio_device_unittest.cc b/modules/audio_device/audio_device_unittest.cc
index 3f2a3f3ddc..48cb43273c 100644
--- a/modules/audio_device/audio_device_unittest.cc
+++ b/modules/audio_device/audio_device_unittest.cc
@@ -126,7 +126,7 @@ int IndexToMilliseconds(size_t index, size_t frames_per_10ms_buffer) {
 // The container is a std::list container and access is protected with a lock
 // since both sides (playout and recording) are driven by its own thread.
 // Note that, we know by design that the size of the audio buffer will not
-// change over time and that both sides will use the same size.
+// change over time and that both sides will in most cases use the same size.
 class FifoAudioStream : public AudioStream {
  public:
   void Write(rtc::ArrayView<const int16_t> source) override {
@@ -152,8 +152,27 @@ class FifoAudioStream : public AudioStream {
       std::fill(destination.begin(), destination.end(), 0);
     } else {
       const Buffer16& buffer = fifo_.front();
-      RTC_CHECK_EQ(buffer.size(), destination.size());
-      std::copy(buffer.begin(), buffer.end(), destination.begin());
+      if (buffer.size() == destination.size()) {
+        // Default case where input and output uses same sample rate and
+        // channel configuration. No conversion is needed.
+        std::copy(buffer.begin(), buffer.end(), destination.begin());
+      } else if (destination.size() == 2 * buffer.size()) {
+        // Recorded input signal in |buffer| is in mono. Do channel upmix to
+        // match stereo output (1 -> 2).
+        for (size_t i = 0; i < buffer.size(); ++i) {
+          destination[2 * i] = buffer[i];
+          destination[2 * i + 1] = buffer[i];
+        }
+      } else if (buffer.size() == 2 * destination.size()) {
+        // Recorded input signal in |buffer| is in stereo. Do channel downmix
+        // to match mono output (2 -> 1).
+        for (size_t i = 0; i < destination.size(); ++i) {
+          destination[i] =
+              (static_cast<int32_t>(buffer[2 * i]) + buffer[2 * i + 1]) / 2;
+        }
+      } else {
+        RTC_NOTREACHED() << "Required conversion is not support";
+      }
       fifo_.pop_front();
     }
   }
@@ -1060,10 +1079,10 @@ TEST_P(AudioDeviceTest, DISABLED_MeasureLoopbackLatency) {
       std::max(kTestTimeOutInMilliseconds, 1000 * kMeasureLatencyTimeInSec)));
   StopRecording();
   StopPlayout();
-  // Verify that the correct number of transmitted impulses are detected.
-  EXPECT_EQ(audio_stream.num_latency_values(),
+  // Verify that a sufficient number of transmitted impulses are detected.
+  EXPECT_GE(audio_stream.num_latency_values(),
             static_cast<size_t>(
-                kImpulseFrequencyInHz * kMeasureLatencyTimeInSec - 1));
+                kImpulseFrequencyInHz * kMeasureLatencyTimeInSec - 2));
   // Print out min, max and average delay values for debugging purposes.
   audio_stream.PrintResults();
 }
diff --git a/modules/audio_device/win/core_audio_base_win.cc b/modules/audio_device/win/core_audio_base_win.cc
index 2e6e9fa598..43e7bba607 100644
--- a/modules/audio_device/win/core_audio_base_win.cc
+++ b/modules/audio_device/win/core_audio_base_win.cc
@@ -362,7 +362,21 @@ bool CoreAudioBase::Init() {
   // Define the output WAVEFORMATEXTENSIBLE format in |format_|.
   WAVEFORMATEX* format = &format_.Format;
   format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
-  format->nChannels = rtc::dchecked_cast<WORD>(params.channels());
+  // Check the preferred channel configuration and request implicit channel
+  // upmixing (audio engine extends from 2 to N channels internally) if the
+  // preferred number of channels is larger than two; i.e., initialize the
+  // stream in stereo even if the preferred configuration is multi-channel.
+  if (params.channels() <= 2) {
+    format->nChannels = rtc::dchecked_cast<WORD>(params.channels());
+  } else {
+    // TODO(henrika): ensure that this approach works on different multi-channel
+    // devices. Verified on:
+    // - Corsair VOID PRO Surround USB Adapter (supports 7.1)
+    RTC_LOG(LS_WARNING)
+        << "Using channel upmixing in WASAPI audio engine (2 => "
+        << params.channels() << ")";
+    format->nChannels = 2;
+  }
   format->nSamplesPerSec = params.sample_rate();
   format->wBitsPerSample = rtc::dchecked_cast<WORD>(params.bits_per_sample());
   format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
@@ -371,10 +385,8 @@ bool CoreAudioBase::Init() {
   // Add the parts which are unique for the WAVE_FORMAT_EXTENSIBLE structure.
   format_.Samples.wValidBitsPerSample =
       rtc::dchecked_cast<WORD>(params.bits_per_sample());
-  // TODO(henrika): improve (common for input and output?)
-  format_.dwChannelMask = params.channels() == 1
-                              ? SPEAKER_FRONT_CENTER
-                              : SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;
+  format_.dwChannelMask =
+      format->nChannels == 1 ? KSAUDIO_SPEAKER_MONO : KSAUDIO_SPEAKER_STEREO;
   format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
   RTC_DLOG(INFO) << core_audio_utility::WaveFormatExToString(&format_);
 
@@ -481,10 +493,8 @@ bool CoreAudioBase::Init() {
     return false;
   }
 
-  // Store valid COM interface.
-  if (audio_client) {
-    audio_client_ = audio_client;
-  }
+  // Store valid COM interfaces.
+  audio_client_ = audio_client;
   audio_session_control_ = audio_session_control;
 
   return true;
diff --git a/modules/audio_device/win/core_audio_utility_win.cc b/modules/audio_device/win/core_audio_utility_win.cc
index 040672a3ce..a19ab77dc2 100644
--- a/modules/audio_device/win/core_audio_utility_win.cc
+++ b/modules/audio_device/win/core_audio_utility_win.cc
@@ -37,6 +37,149 @@ namespace {
 
 using core_audio_utility::ErrorToString;
 
+// Converts from channel mask to list of included channels.
+// Each audio data format contains channels for one or more of the positions
+// listed below. The number of channels simply equals the number of nonzero
+// flag bits in the |channel_mask|. The relative positions of the channels
+// within each block of audio data always follow the same relative ordering
+// as the flag bits in the table below. For example, if |channel_mask| contains
+// the value 0x00000033, the format defines four audio channels that are
+// assigned for playback to the front-left, front-right, back-left,
+// and back-right speakers, respectively. The channel data should be interleaved
+// in that order within each block.
+std::string ChannelMaskToString(DWORD channel_mask) {
+  std::string ss;
+  int n = 0;
+  if (channel_mask & SPEAKER_FRONT_LEFT) {
+    ss += "FRONT_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_FRONT_RIGHT) {
+    ss += "FRONT_RIGHT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_FRONT_CENTER) {
+    ss += "FRONT_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_LOW_FREQUENCY) {
+    ss += "LOW_FREQUENCY | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_BACK_LEFT) {
+    ss += "BACK_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_BACK_RIGHT) {
+    ss += "BACK_RIGHT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_FRONT_LEFT_OF_CENTER) {
+    ss += "FRONT_LEFT_OF_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_FRONT_RIGHT_OF_CENTER) {
+    ss += "RIGHT_OF_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_BACK_CENTER) {
+    ss += "BACK_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_SIDE_LEFT) {
+    ss += "SIDE_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_SIDE_RIGHT) {
+    ss += "SIDE_RIGHT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_CENTER) {
+    ss += "TOP_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_FRONT_LEFT) {
+    ss += "TOP_FRONT_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_FRONT_CENTER) {
+    ss += "TOP_FRONT_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_FRONT_RIGHT) {
+    ss += "TOP_FRONT_RIGHT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_BACK_LEFT) {
+    ss += "TOP_BACK_LEFT | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_BACK_CENTER) {
+    ss += "TOP_BACK_CENTER | ";
+    ++n;
+  }
+  if (channel_mask & SPEAKER_TOP_BACK_RIGHT) {
+    ss += "TOP_BACK_RIGHT | ";
+    ++n;
+  }
+
+  if (!ss.empty()) {
+    // Delete last appended " | " substring.
+    ss.erase(ss.end() - 3, ss.end());
+  }
+  ss += " (";
+  ss += std::to_string(n);
+  ss += ")";
+  return ss;
+}
+
+// Converts from channel mask to DirectSound speaker configuration.
+// The values below are copied from ksmedia.h.
+// Example: KSAUDIO_SPEAKER_STEREO = (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT).
+const char* DirectSoundConfigToString(DWORD channel_mask) {
+  switch (channel_mask) {
+    case KSAUDIO_SPEAKER_DIRECTOUT:
+      return "KSAUDIO_DIRECTOUT";
+    case KSAUDIO_SPEAKER_MONO:
+      // Front center (C)
+      return "KSAUDIO_MONO";
+    case KSAUDIO_SPEAKER_1POINT1:
+      return "KSAUDIO_1POINT1";
+    case KSAUDIO_SPEAKER_STEREO:
+      // Front left (L), front right (R).
+      return "KSAUDIO_STEREO";
+    case KSAUDIO_SPEAKER_2POINT1:
+      return "KSAUDIO_2POINT1";
+    case KSAUDIO_SPEAKER_3POINT0:
+      return "KSAUDIO_3POINT0";
+    case KSAUDIO_SPEAKER_3POINT1:
+      return "KSAUDIO_3POINT1";
+    case KSAUDIO_SPEAKER_QUAD:
+      // L, R, back left (Lb), back right (Rb).
+      return "KSAUDIO_QUAD";
+    case KSAUDIO_SPEAKER_SURROUND:
+      // L, R, front center (C), back center (Cb).
+      return "KSAUDIO_SURROUND";
+    case KSAUDIO_SPEAKER_5POINT0:
+      return "KSAUDIO_5POINT0";
+    case KSAUDIO_SPEAKER_5POINT1:
+      return "KSAUDIO_5POINT1";
+    case KSAUDIO_SPEAKER_7POINT0:
+      return "KSAUDIO_7POINT0";
+    case KSAUDIO_SPEAKER_7POINT1:
+      // L, R, C, Lb, Rb, front left-of-center, front right-of-center, LFE.
+      return "KSAUDIO_7POINT1";
+    case KSAUDIO_SPEAKER_5POINT1_SURROUND:
+      // L, R, C, side left (Ls), side right (Rs), LFE.
+      return "KSAUDIO_5POINT1_SURROUND";
+    case KSAUDIO_SPEAKER_7POINT1_SURROUND:
+      // L, R, C, Lb, Rb, Ls, Rs, LFE.
+      return "KSAUDIO_7POINT1_SURROUND";
+    default:
+      return "KSAUDIO_INVALID";
+  }
+}
+
 bool LoadAudiosesDll() {
   static const wchar_t* const kAudiosesDLL =
       L"%WINDIR%\\system32\\audioses.dll";
@@ -394,7 +537,6 @@ HRESULT GetPreferredAudioParametersInternal(IAudioClient* client,
   // const size_t bits_per_sample = AudioParameters::kBitsPerSample;
   // TODO(henrika): improve channel layout support.
   const size_t channels = mix_format.Format.nChannels;
-  RTC_DCHECK_LE(channels, 2);
 
   // Use the native device period to derive the smallest possible buffer size
   // in shared mode.
@@ -669,8 +811,17 @@ HRESULT SetClientProperties(IAudioClient2* client) {
   // TODO(henrika): pros and cons compared with AUDCLNT_STREAMOPTIONS_NONE?
   props.Options |= AUDCLNT_STREAMOPTIONS_NONE;
   // Requires System.Devices.AudioDevice.RawProcessingSupported.
+  // The application can choose to *always ignore* the OEM AEC/AGC by setting
+  // the AUDCLNT_STREAMOPTIONS_RAW flag in the call to SetClientProperties.
+  // This flag will preserve the user experience aspect of Communications
+  // streams, but will not insert any OEM provided communications specific
+  // processing in the audio signal path.
   // props.Options |= AUDCLNT_STREAMOPTIONS_RAW;
+
   // If it is important to avoid resampling in the audio engine, set this flag.
+  // AUDCLNT_STREAMOPTIONS_MATCH_FORMAT (or anything in IAudioClient3) is not
+  // an appropriate interface to use for communications scenarios.
+  // This interface is mainly meant for pro audio scenarios.
   // props.Options |= AUDCLNT_STREAMOPTIONS_MATCH_FORMAT;
   RTC_DLOG(INFO) << "options: 0x" << rtc::ToHex(props.Options);
   error = client->SetClientProperties(&props);
@@ -1001,7 +1152,7 @@ HRESULT SharedModeInitializeLowLatency(IAudioClient3* client,
   }
 
   // Define stream flags.
-  DWORD stream_flags = 0;
+  DWORD stream_flags = AUDCLNT_STREAMFLAGS_NOPERSIST;
   bool use_event =
       (event_handle != nullptr && event_handle != INVALID_HANDLE_VALUE);
   if (use_event) {
@@ -1215,6 +1366,10 @@ std::string WaveFormatExToString(const WAVEFORMATEXTENSIBLE* format) {
   } else {
     ss << ", SubFormat: NOT_SUPPORTED";
   }
+  ss.AppendFormat("\nChannel configuration: %s",
+                  ChannelMaskToString(format->dwChannelMask).c_str());
+  ss.AppendFormat("\nDirectSound configuration : %s",
+                  DirectSoundConfigToString(format->dwChannelMask));
   return ss.str();
 }