diff --git a/data/audio_processing/output_data_fixed.pb b/data/audio_processing/output_data_fixed.pb index 6e36d5594a..eb525c32a6 100644 Binary files a/data/audio_processing/output_data_fixed.pb and b/data/audio_processing/output_data_fixed.pb differ diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb index 0fd2fe8cd8..79619e7fda 100644 Binary files a/data/audio_processing/output_data_float.pb and b/data/audio_processing/output_data_float.pb differ diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index 9947060141..cd8d93aa70 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -51,11 +51,18 @@ int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { return -1; } -template -void StereoToMono(const T* left, const T* right, T* out, +void StereoToMono(const float* left, const float* right, float* out, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) + for (int i = 0; i < samples_per_channel; ++i) { out[i] = (left[i] + right[i]) / 2; + } +} + +void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, + int samples_per_channel) { + for (int i = 0; i < samples_per_channel; ++i) { + out[i] = (left[i] + right[i]) >> 1; + } } } // namespace @@ -107,7 +114,13 @@ class IFChannelBuffer { void RefreshI() { if (!ivalid_) { assert(fvalid_); - FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data()); + const float* const float_data = fbuf_.data(); + int16_t* const int_data = ibuf_.data(); + const int length = ibuf_.length(); + for (int i = 0; i < length; ++i) + int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits::max(), + float_data[i], + std::numeric_limits::min()); ivalid_ = true; } } @@ -217,8 +230,8 @@ void AudioBuffer::CopyFrom(const float* const* data, // Convert to int16. for (int i = 0; i < num_proc_channels_; ++i) { - FloatToFloatS16(data_ptr[i], proc_samples_per_channel_, - channels_->fbuf()->channel(i)); + FloatToS16(data_ptr[i], proc_samples_per_channel_, + channels_->ibuf()->channel(i)); } } @@ -235,9 +248,9 @@ void AudioBuffer::CopyTo(int samples_per_channel, data_ptr = process_buffer_->channels(); } for (int i = 0; i < num_proc_channels_; ++i) { - FloatS16ToFloat(channels_->fbuf()->channel(i), - proc_samples_per_channel_, - data_ptr[i]); + S16ToFloat(channels_->ibuf()->channel(i), + proc_samples_per_channel_, + data_ptr[i]); } // Resample. @@ -436,7 +449,12 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { // Downmix directly; no explicit deinterleaving needed. int16_t* downmixed = channels_->ibuf()->channel(0); for (int i = 0; i < input_samples_per_channel_; ++i) { - downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2; + // HACK(ajm): The downmixing in the int16_t path is in practice never + // called from production code. We do this weird scaling to and from float + // to satisfy tests checking for bit-exactness with the float path. + float downmix_float = (S16ToFloat(frame->data_[i * 2]) + + S16ToFloat(frame->data_[i * 2 + 1])) / 2; + downmixed[i] = FloatToS16(downmix_float); } } else { assert(num_proc_channels_ == num_input_channels_); diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index af31a63674..282da94782 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -1650,7 +1650,7 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) { #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } -TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { +TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { audioproc::OutputData ref_data; OpenFileAndReadMessage(ref_filename_, &ref_data); @@ -1679,8 +1679,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { Init(fapm.get()); ChannelBuffer output_cb(samples_per_channel, num_input_channels); - ChannelBuffer output_int16(samples_per_channel, - num_input_channels); + scoped_ptr output_int16(new int16_t[output_length]); int analog_level = 127; while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && @@ -1702,9 +1701,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); EXPECT_NOERR(apm_->ProcessStream(frame_)); - Deinterleave(frame_->data_, samples_per_channel, num_output_channels, - output_int16.channels()); - + // TODO(ajm): Update to support different output rates. EXPECT_NOERR(fapm->ProcessStream( float_cb_->channels(), samples_per_channel, @@ -1714,34 +1711,24 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { LayoutFromChannels(num_output_channels), float_cb_->channels())); + // Convert to interleaved int16. FloatToS16(float_cb_->data(), output_length, output_cb.data()); - for (int j = 0; j < num_output_channels; ++j) { - float variance = 0; - float snr = ComputeSNR(output_int16.channel(j), output_cb.channel(j), - samples_per_channel, &variance); - #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) - // There are a few chunks in the fixed-point profile that give low SNR. - // Listening confirmed the difference is acceptable. - const float kVarianceThreshold = 150; - const float kSNRThreshold = 10; - #else - const float kVarianceThreshold = 20; - const float kSNRThreshold = 20; - #endif - // Skip frames with low energy. - if (sqrt(variance) > kVarianceThreshold) { - EXPECT_LT(kSNRThreshold, snr); - } - } + Interleave(output_cb.channels(), + samples_per_channel, + num_output_channels, + output_int16.get()); + // Verify float and int16 paths produce identical output. + EXPECT_EQ(0, memcmp(frame_->data_, output_int16.get(), output_length)); analog_level = fapm->gain_control()->stream_analog_level(); EXPECT_EQ(apm_->gain_control()->stream_analog_level(), fapm->gain_control()->stream_analog_level()); EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(), fapm->echo_cancellation()->stream_has_echo()); - EXPECT_NEAR(apm_->noise_suppression()->speech_probability(), - fapm->noise_suppression()->speech_probability(), - 0.0005); + EXPECT_EQ(apm_->voice_detection()->stream_has_voice(), + fapm->voice_detection()->stream_has_voice()); + EXPECT_EQ(apm_->noise_suppression()->speech_probability(), + fapm->noise_suppression()->speech_probability()); // Reset in case of downmixing. frame_->num_channels_ = test->num_input_channels(); diff --git a/webrtc/modules/audio_processing/test/test_utils.h b/webrtc/modules/audio_processing/test/test_utils.h index a99f3427de..61edd8f35b 100644 --- a/webrtc/modules/audio_processing/test/test_utils.h +++ b/webrtc/modules/audio_processing/test/test_utils.h @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include #include #include "webrtc/audio_processing/debug.pb.h" @@ -154,26 +153,4 @@ static inline bool ReadMessageFromFile(FILE* file, return msg->ParseFromArray(bytes.get(), size); } -template -float ComputeSNR(const T* ref, const T* test, int length, float* variance) { - float mse = 0; - float mean = 0; - *variance = 0; - for (int i = 0; i < length; ++i) { - T error = ref[i] - test[i]; - mse += error * error; - *variance += ref[i] * ref[i]; - mean += ref[i]; - } - mse /= length; - *variance /= length; - mean /= length; - *variance -= mean * mean; - - float snr = 100; // We assign 100 dB to the zero-error case. - if (mse > 0) - snr = 10 * log10(*variance / mse); - return snr; -} - } // namespace webrtc