From 30be827e6a2bfae76c445e62d0853f83d238814a Mon Sep 17 00:00:00 2001
From: "andrew@webrtc.org" <andrew@webrtc.org>
Date: Wed, 24 Sep 2014 20:06:23 +0000
Subject: [PATCH] Enable render downmixing to mono in AudioProcessing.

In practice, we have been doing this since time immemorial, but have
relied on the user to do the downmixing (first voice engine then
Chromium). It's more logical for this burden to fall on AudioProcessing,
however, who can be expected to know that this is a reasonable approach
for AEC. Permitting two render channels results in running two AECs
serially.

Critically, in my recent change to have Chromium adopt the float
interface:
https://codereview.chromium.org/420603004
I removed the downmixing by Chromium, forgetting that we hadn't yet
enabled this feature in AudioProcessing. This corrects that oversight.

The change in paths hit by production users is very minor. As commented
it required adding downmixing to the int16_t path to satisfy
bit-exactness tests.

For reference, find the ApmTest.Process errors here:
https://paste.googleplex.com/6372007910309888

BUG=webrtc:3853
TESTED=listened to the files output from the Process test, and verified
that they sound as expected: higher echo while the AEC is adapting, but
afterwards very close.

R=aluebs@webrtc.org, bjornv@webrtc.org, kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/31459004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7292 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 data/audio_processing/output_data_fixed.pb    | Bin 188 -> 188 bytes
 data/audio_processing/output_data_float.pb    | Bin 1386 -> 1404 bytes
 .../modules/audio_processing/audio_buffer.cc  |  31 +++++++++++++-----
 .../audio_processing/audio_processing_impl.cc |   7 ++--
 .../test/audio_processing_unittest.cc         |   5 +--
 5 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/data/audio_processing/output_data_fixed.pb b/data/audio_processing/output_data_fixed.pb
index c5ba4391044f5fe16940d57d21c53363e4d30eb6..eb525c32a615eba13566863e13c6f72a3ca5f1d9 100644
GIT binary patch
delta 67
zcmdnPxQB5<9`8vJ2L_LWECCE$q8v;U<;Ap_1ehe46b{%K)LU$jaA5G*0+j;`H`E%`
PTg;PmVDNa&62JfeBYqEj

delta 85
zcmdnPxQB5<p5Igv2L_KDECCE$q8v;Dj1r6r4YdaK76(Kf7(9N#gqb9m6b{%K)LZ-(
WcVO^1hb#<M{YJup!Q%o;00RJj5)?%M

diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb
index 7d3f9cddff640e1d83931f3d114071018a20cdb9..28ef817b814c965230fe3ac44fe8966e3ee65eab 100644
GIT binary patch
delta 147
zcmaFG^@oeGVk6^urimgo9F6Hh_FS19OcQ4$bEsYsvFFO-V4BR#m?HBmOvIk6myv@>
zfJuT$;eee%2cyL%0f&!F9<!Mz>#{gaKFX*k$B-disv9n3&y@gG*HCNF#%S?D(BUPM
o$NtHJOdit7&7$^Pi7@G3j0WwD7CVGM^1CLNFnO>r92K<(0H|Iky#N3J

delta 441
zcmeyv^@@uzZ6o7(CXQV^4j-62HZV=LXEB}3%goEjF)`k-KH(z2Jy!w;lK`Uxqe4Ti
zK_{cdWIl)IOdczl0vMvKxHvdu1Y{&+6l8>?IDY+y0!9IL2~GtBSBis!K|oYOLV-aE
z#Nw3TQea48;ouNp$dE40-z{p-l?b!p7o$NJqs39M6^obxlBi=xO_iuUS1%(6lK_(h
zlfnTzgAPWEJNzL3E(iJ7iIIKsF-F}$kaOy}1Rx#;scfh<Xk)avAPDmCa+rtZ1>_~<
z7377aIDSC=`yb+A1Q+aK2??1=tfGFZi=qAl8~BUSpq<fTxe&-ico33iB1;adm^}cJ
C`-@@#

diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc
index fb2c200e5d..2bbd7710eb 100644
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@@ -406,19 +406,32 @@ int AudioBuffer::samples_per_keyboard_channel() const {
 // TODO(andrew): Do deinterleaving and mixing in one step?
 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
   assert(proc_samples_per_channel_ == input_samples_per_channel_);
-  assert(num_proc_channels_ == num_input_channels_);
-  assert(frame->num_channels_ == num_proc_channels_);
+  assert(frame->num_channels_ == num_input_channels_);
   assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
   InitForNewData();
   activity_ = frame->vad_activity_;
 
-  int16_t* interleaved = frame->data_;
-  for (int i = 0; i < num_proc_channels_; i++) {
-    int16_t* deinterleaved = channels_->ibuf()->channel(i);
-    int interleaved_idx = i;
-    for (int j = 0; j < proc_samples_per_channel_; j++) {
-      deinterleaved[j] = interleaved[interleaved_idx];
-      interleaved_idx += num_proc_channels_;
+  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
+    // Downmix directly; no explicit deinterleaving needed.
+    int16_t* downmixed = channels_->ibuf()->channel(0);
+    for (int i = 0; i < input_samples_per_channel_; ++i) {
+      // HACK(ajm): The downmixing in the int16_t path is in practice never
+      // called from production code. We do this weird scaling to and from float
+      // to satisfy tests checking for bit-exactness with the float path.
+      float downmix_float = (ScaleToFloat(frame->data_[i * 2]) +
+                             ScaleToFloat(frame->data_[i * 2 + 1])) / 2;
+      downmixed[i] = ScaleAndRoundToInt16(downmix_float);
+    }
+  } else {
+    assert(num_proc_channels_ == num_input_channels_);
+    int16_t* interleaved = frame->data_;
+    for (int i = 0; i < num_proc_channels_; ++i) {
+      int16_t* deinterleaved = channels_->ibuf()->channel(i);
+      int interleaved_idx = i;
+      for (int j = 0; j < proc_samples_per_channel_; ++j) {
+        deinterleaved[j] = interleaved[interleaved_idx];
+        interleaved_idx += num_proc_channels_;
+      }
     }
   }
 }
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 49a7d4f56c..d91cbd2fd3 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -257,10 +257,9 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
     }
   }
 
-  // TODO(ajm): Enable this.
-  // Always downmix the reverse stream to mono for analysis.
-  //rev_proc_format_.set(rev_proc_rate, 1);
-  rev_proc_format_.set(rev_proc_rate, rev_in_format_.num_channels());
+  // Always downmix the reverse stream to mono for analysis. This has been
+  // demonstrated to work well for AEC in most practical scenarios.
+  rev_proc_format_.set(rev_proc_rate, 1);
 
   if (fwd_proc_format_.rate() == kSampleRate32kHz) {
     split_rate_ = kSampleRate16kHz;
diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
index 3a35fe5b66..a0fb303b52 100644
--- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
@@ -751,7 +751,8 @@ TEST_F(ApmTest, Channels) {
   for (int i = 1; i < 3; i++) {
     TestChangingChannels(i, kNoErr);
     EXPECT_EQ(i, apm_->num_input_channels());
-    EXPECT_EQ(i, apm_->num_reverse_channels());
+    // We always force the number of reverse channels used for processing to 1.
+    EXPECT_EQ(1, apm_->num_reverse_channels());
   }
 }
 
@@ -1671,7 +1672,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
     const int num_output_channels = test->num_output_channels();
     const int samples_per_channel = test->sample_rate() *
         AudioProcessing::kChunkSizeMs / 1000;
-    const int output_length =  samples_per_channel * num_output_channels;
+    const int output_length = samples_per_channel * num_output_channels;
 
     Init(test->sample_rate(), test->sample_rate(), test->sample_rate(),
          num_input_channels, num_output_channels, num_render_channels, true);