Adding 120 ms frame length support in NetEq.

BUG=webrtc:1015

Review-Url: https://codereview.webrtc.org/1901633002
Cr-Commit-Position: refs/heads/master@{#12592}
This commit is contained in:
minyue
2016-05-02 04:46:11 -07:00
committed by Commit bot
parent e4246b61b6
commit 5bd3397e53
6 changed files with 288 additions and 20 deletions

View File

@ -39,6 +39,8 @@ Merge::Merge(int fs_hz,
assert(num_channels_ > 0); assert(num_channels_ > 0);
} }
Merge::~Merge() = default;
size_t Merge::Process(int16_t* input, size_t input_length, size_t Merge::Process(int16_t* input, size_t input_length,
int16_t* external_mute_factor_array, int16_t* external_mute_factor_array,
AudioMultiVector* output) { AudioMultiVector* output) {
@ -91,9 +93,8 @@ size_t Merge::Process(int16_t* input, size_t input_length,
old_length, input_length_per_channel, expand_period); old_length, input_length_per_channel, expand_period);
} }
static const int kTempDataSize = 3600; temp_data_.resize(input_length_per_channel + best_correlation_index);
int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this. int16_t* decoded_output = temp_data_.data() + best_correlation_index;
int16_t* decoded_output = temp_data + best_correlation_index;
// Mute the new decoded data if needed (and unmute it linearly). // Mute the new decoded data if needed (and unmute it linearly).
// This is the overlapping part of expanded_signal. // This is the overlapping part of expanded_signal.
@ -127,7 +128,7 @@ size_t Merge::Process(int16_t* input, size_t input_length,
int16_t increment = int16_t increment =
static_cast<int16_t>(16384 / (interpolation_length + 1)); // In Q14. static_cast<int16_t>(16384 / (interpolation_length + 1)); // In Q14.
int16_t mute_factor = 16384 - increment; int16_t mute_factor = 16384 - increment;
memmove(temp_data, expanded_channel, memmove(temp_data_.data(), expanded_channel,
sizeof(int16_t) * best_correlation_index); sizeof(int16_t) * best_correlation_index);
DspHelper::CrossFade(&expanded_channel[best_correlation_index], DspHelper::CrossFade(&expanded_channel[best_correlation_index],
input_channel, interpolation_length, input_channel, interpolation_length,
@ -140,8 +141,8 @@ size_t Merge::Process(int16_t* input, size_t input_length,
} else { } else {
assert(output->Size() == output_length); assert(output->Size() == output_length);
} }
memcpy(&(*output)[channel][0], temp_data, memcpy(&(*output)[channel][0], temp_data_.data(),
sizeof(temp_data[0]) * output_length); sizeof(temp_data_[0]) * output_length);
} }
// Copy back the first part of the data to |sync_buffer_| and remove it from // Copy back the first part of the data to |sync_buffer_| and remove it from
@ -208,22 +209,20 @@ int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,
std::min(static_cast<size_t>(64 * fs_mult_), input_length); std::min(static_cast<size_t>(64 * fs_mult_), input_length);
const int16_t expanded_max = const int16_t expanded_max =
WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length); WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length); int32_t factor = (expanded_max * expanded_max) /
(std::numeric_limits<int32_t>::max() /
// Calculate energy of expanded signal. static_cast<int32_t>(mod_input_length));
// |log_fs_mult| is log2(fs_mult_), but is not exact for 48000 Hz. const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_);
int expanded_shift = 6 + log_fs_mult
- WebRtcSpl_NormW32(expanded_max * expanded_max);
expanded_shift = std::max(expanded_shift, 0);
int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal, int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,
expanded_signal, expanded_signal,
mod_input_length, mod_input_length,
expanded_shift); expanded_shift);
// Calculate energy of input signal. // Calculate energy of input signal.
int input_shift = 6 + log_fs_mult - WebRtcSpl_NormW32(input_max * input_max); const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
input_shift = std::max(input_shift, 0); factor = (input_max * input_max) / (std::numeric_limits<int32_t>::max() /
static_cast<int32_t>(mod_input_length));
const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input, int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,
mod_input_length, mod_input_length,
input_shift); input_shift);

View File

@ -37,7 +37,7 @@ class Merge {
size_t num_channels, size_t num_channels,
Expand* expand, Expand* expand,
SyncBuffer* sync_buffer); SyncBuffer* sync_buffer);
virtual ~Merge() {} virtual ~Merge();
// The main method to produce the audio data. The decoded data is supplied in // The main method to produce the audio data. The decoded data is supplied in
// |input|, having |input_length| samples in total for all channels // |input|, having |input_length| samples in total for all channels
@ -93,6 +93,7 @@ class Merge {
int16_t expanded_downsampled_[kExpandDownsampLength]; int16_t expanded_downsampled_[kExpandDownsampLength];
int16_t input_downsampled_[kInputDownsampLength]; int16_t input_downsampled_[kInputDownsampLength];
AudioMultiVector expanded_; AudioMultiVector expanded_;
std::vector<int16_t> temp_data_;
RTC_DISALLOW_COPY_AND_ASSIGN(Merge); RTC_DISALLOW_COPY_AND_ASSIGN(Merge);
}; };

View File

@ -500,6 +500,11 @@ const SyncBuffer* NetEqImpl::sync_buffer_for_test() const {
return sync_buffer_.get(); return sync_buffer_.get();
} }
Operations NetEqImpl::last_operation_for_test() const {
rtc::CritScope lock(&crit_sect_);
return last_operation_;
}
// Methods below this line are private. // Methods below this line are private.
int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header, int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
@ -905,6 +910,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame) {
return kInvalidOperation; return kInvalidOperation;
} }
} // End of switch. } // End of switch.
last_operation_ = operation;
if (return_value < 0) { if (return_value < 0) {
return return_value; return return_value;
} }

View File

@ -204,10 +204,11 @@ class NetEqImpl : public webrtc::NetEq {
// This accessor method is only intended for testing purposes. // This accessor method is only intended for testing purposes.
const SyncBuffer* sync_buffer_for_test() const; const SyncBuffer* sync_buffer_for_test() const;
Operations last_operation_for_test() const;
protected: protected:
static const int kOutputSizeMs = 10; static const int kOutputSizeMs = 10;
static const size_t kMaxFrameSize = 2880; // 60 ms @ 48 kHz. static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz.
// TODO(hlundin): Provide a better value for kSyncBufferSize. // TODO(hlundin): Provide a better value for kSyncBufferSize.
static const size_t kSyncBufferSize = 2 * kMaxFrameSize; static const size_t kSyncBufferSize = 2 * kMaxFrameSize;
@ -383,6 +384,7 @@ class NetEqImpl : public webrtc::NetEq {
size_t output_size_samples_ GUARDED_BY(crit_sect_); size_t output_size_samples_ GUARDED_BY(crit_sect_);
size_t decoder_frame_length_ GUARDED_BY(crit_sect_); size_t decoder_frame_length_ GUARDED_BY(crit_sect_);
Modes last_mode_ GUARDED_BY(crit_sect_); Modes last_mode_ GUARDED_BY(crit_sect_);
Operations last_operation_ GUARDED_BY(crit_sect_);
std::unique_ptr<int16_t[]> mute_factor_array_ GUARDED_BY(crit_sect_); std::unique_ptr<int16_t[]> mute_factor_array_ GUARDED_BY(crit_sect_);
size_t decoded_buffer_length_ GUARDED_BY(crit_sect_); size_t decoded_buffer_length_ GUARDED_BY(crit_sect_);
std::unique_ptr<int16_t[]> decoded_buffer_ GUARDED_BY(crit_sect_); std::unique_ptr<int16_t[]> decoded_buffer_ GUARDED_BY(crit_sect_);

View File

@ -763,7 +763,7 @@ TEST_F(NetEqImplTest, CodecInternalCng) {
TEST_F(NetEqImplTest, UnsupportedDecoder) { TEST_F(NetEqImplTest, UnsupportedDecoder) {
UseNoMocks(); UseNoMocks();
CreateInstance(); CreateInstance();
static const size_t kNetEqMaxFrameSize = 2880; // 60 ms @ 48 kHz. static const size_t kNetEqMaxFrameSize = 5760; // 120 ms @ 48 kHz.
static const size_t kChannels = 2; static const size_t kChannels = 2;
const uint8_t kPayloadType = 17; // Just an arbitrary number. const uint8_t kPayloadType = 17; // Just an arbitrary number.
@ -773,7 +773,7 @@ TEST_F(NetEqImplTest, UnsupportedDecoder) {
const size_t kPayloadLengthSamples = const size_t kPayloadLengthSamples =
static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms. static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms.
const size_t kPayloadLengthBytes = 1; const size_t kPayloadLengthBytes = 1;
uint8_t payload[kPayloadLengthBytes]= {0}; uint8_t payload[kPayloadLengthBytes] = {0};
int16_t dummy_output[kPayloadLengthSamples * kChannels] = {0}; int16_t dummy_output[kPayloadLengthSamples * kChannels] = {0};
WebRtcRTPHeader rtp_header; WebRtcRTPHeader rtp_header;
rtp_header.header.payloadType = kPayloadType; rtp_header.header.payloadType = kPayloadType;
@ -1189,4 +1189,214 @@ TEST_F(NetEqImplTest, TickTimerIncrement) {
EXPECT_EQ(1u, tick_timer_->ticks()); EXPECT_EQ(1u, tick_timer_->ticks());
} }
class Decoder120ms : public AudioDecoder {
public:
Decoder120ms(SpeechType speech_type)
: next_value_(1),
speech_type_(speech_type) {}
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override {
size_t decoded_len =
rtc::CheckedDivExact(sample_rate_hz, 1000) * 120 * Channels();
for (size_t i = 0; i < decoded_len; ++i) {
decoded[i] = next_value_++;
}
*speech_type = speech_type_;
return decoded_len;
}
void Reset() override { next_value_ = 1; }
size_t Channels() const override { return 2; }
private:
int16_t next_value_;
SpeechType speech_type_;
};
class NetEqImplTest120ms : public NetEqImplTest {
protected:
NetEqImplTest120ms() : NetEqImplTest() {}
virtual ~NetEqImplTest120ms() {}
void CreateInstanceNoMocks() {
UseNoMocks();
CreateInstance();
}
void CreateInstanceWithDelayManagerMock() {
UseNoMocks();
use_mock_delay_manager_ = true;
CreateInstance();
}
uint32_t timestamp_diff_between_packets() const {
return rtc::CheckedDivExact(kSamplingFreq_, 1000u) * 120;
}
uint32_t first_timestamp() const { return 10u; }
void GetFirstPacket() {
for (int i = 0; i < 12; i++) {
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
}
}
void InsertPacket(uint32_t timestamp) {
WebRtcRTPHeader rtp_header;
rtp_header.header.payloadType = kPayloadType;
rtp_header.header.sequenceNumber = sequence_number_;
rtp_header.header.timestamp = timestamp;
rtp_header.header.ssrc = 15;
const size_t kPayloadLengthBytes = 1; // This can be arbitrary.
uint8_t payload[kPayloadLengthBytes] = {0};
EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload, 10));
sequence_number_++;
}
void Register120msCodec(AudioDecoder::SpeechType speech_type) {
decoder_.reset(new Decoder120ms(speech_type));
ASSERT_EQ(2u, decoder_->Channels());
EXPECT_EQ(NetEq::kOK, neteq_->RegisterExternalDecoder(
decoder_.get(), NetEqDecoder::kDecoderOpus_2ch,
"120ms codec", kPayloadType, kSamplingFreq_));
}
std::unique_ptr<Decoder120ms> decoder_;
AudioFrame output_;
const uint32_t kPayloadType = 17;
const uint32_t kSamplingFreq_ = 48000;
uint16_t sequence_number_ = 1;
};
TEST_F(NetEqImplTest120ms, AudioRepetition) {
config_.playout_mode = kPlayoutFax;
CreateInstanceNoMocks();
Register120msCodec(AudioDecoder::kSpeech);
InsertPacket(first_timestamp());
GetFirstPacket();
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
EXPECT_EQ(kAudioRepetition, neteq_->last_operation_for_test());
}
TEST_F(NetEqImplTest120ms, AlternativePlc) {
config_.playout_mode = kPlayoutOff;
CreateInstanceNoMocks();
Register120msCodec(AudioDecoder::kSpeech);
InsertPacket(first_timestamp());
GetFirstPacket();
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
EXPECT_EQ(kAlternativePlc, neteq_->last_operation_for_test());
}
TEST_F(NetEqImplTest120ms, CodecInternalCng) {
CreateInstanceNoMocks();
Register120msCodec(AudioDecoder::kComfortNoise);
InsertPacket(first_timestamp());
GetFirstPacket();
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
EXPECT_EQ(kCodecInternalCng, neteq_->last_operation_for_test());
}
TEST_F(NetEqImplTest120ms, Normal) {
CreateInstanceNoMocks();
Register120msCodec(AudioDecoder::kSpeech);
InsertPacket(first_timestamp());
GetFirstPacket();
EXPECT_EQ(kNormal, neteq_->last_operation_for_test());
}
TEST_F(NetEqImplTest120ms, Merge) {
CreateInstanceWithDelayManagerMock();
Register120msCodec(AudioDecoder::kSpeech);
InsertPacket(first_timestamp());
GetFirstPacket();
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
InsertPacket(first_timestamp() + 2 * timestamp_diff_between_packets());
// Delay manager reports a target level which should cause a Merge.
EXPECT_CALL(*mock_delay_manager_, TargetLevel()).WillOnce(Return(-10));
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
EXPECT_EQ(kMerge, neteq_->last_operation_for_test());
}
TEST_F(NetEqImplTest120ms, Expand) {
CreateInstanceNoMocks();
Register120msCodec(AudioDecoder::kSpeech);
InsertPacket(first_timestamp());
GetFirstPacket();
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
EXPECT_EQ(kExpand, neteq_->last_operation_for_test());
}
TEST_F(NetEqImplTest120ms, FastAccelerate) {
CreateInstanceWithDelayManagerMock();
Register120msCodec(AudioDecoder::kSpeech);
InsertPacket(first_timestamp());
GetFirstPacket();
InsertPacket(first_timestamp() + timestamp_diff_between_packets());
// Delay manager report buffer limit which should cause a FastAccelerate.
EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _))
.Times(1)
.WillOnce(DoAll(SetArgPointee<0>(0), SetArgPointee<1>(0)));
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
EXPECT_EQ(kFastAccelerate, neteq_->last_operation_for_test());
}
TEST_F(NetEqImplTest120ms, PreemptiveExpand) {
CreateInstanceWithDelayManagerMock();
Register120msCodec(AudioDecoder::kSpeech);
InsertPacket(first_timestamp());
GetFirstPacket();
InsertPacket(first_timestamp() + timestamp_diff_between_packets());
// Delay manager report buffer limit which should cause a PreemptiveExpand.
EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _))
.Times(1)
.WillOnce(DoAll(SetArgPointee<0>(100), SetArgPointee<1>(100)));
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
EXPECT_EQ(kPreemptiveExpand, neteq_->last_operation_for_test());
}
TEST_F(NetEqImplTest120ms, Accelerate) {
CreateInstanceWithDelayManagerMock();
Register120msCodec(AudioDecoder::kSpeech);
InsertPacket(first_timestamp());
GetFirstPacket();
InsertPacket(first_timestamp() + timestamp_diff_between_packets());
// Delay manager report buffer limit which should cause a Accelerate.
EXPECT_CALL(*mock_delay_manager_, BufferLimits(_, _))
.Times(1)
.WillOnce(DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2)));
EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output_));
EXPECT_EQ(kAccelerate, neteq_->last_operation_for_test());
}
}// namespace webrtc }// namespace webrtc

View File

@ -27,9 +27,20 @@
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h" #include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
using ::testing::_; using ::testing::_;
using ::testing::Invoke;
namespace webrtc { namespace webrtc {
namespace {
int ExpandProcess120ms(AudioMultiVector* output) {
AudioMultiVector dummy_audio(1, 11520u);
dummy_audio.CopyTo(output);
return 0;
}
} // namespace
TEST(Normal, CreateAndDestroy) { TEST(Normal, CreateAndDestroy) {
MockDecoderDatabase db; MockDecoderDatabase db;
int fs = 8000; int fs = 8000;
@ -121,6 +132,45 @@ TEST(Normal, InputLengthAndChannelsDoNotMatch) {
EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope. EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope.
} }
TEST(Normal, LastModeExpand120msPacket) {
WebRtcSpl_Init();
MockDecoderDatabase db;
const int kFs = 48000;
const size_t kPacketsizeBytes = 11520u;
const size_t kChannels = 1;
BackgroundNoise bgn(kChannels);
SyncBuffer sync_buffer(kChannels, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs,
kChannels);
Normal normal(kFs, &db, bgn, &expand);
int16_t input[kPacketsizeBytes] = {0};
std::unique_ptr<int16_t[]> mute_factor_array(new int16_t[kChannels]);
for (size_t i = 0; i < kChannels; ++i) {
mute_factor_array[i] = 16384;
}
AudioMultiVector output(kChannels);
EXPECT_CALL(expand, SetParametersForNormalAfterExpand());
EXPECT_CALL(expand, Process(_)).WillOnce(Invoke(ExpandProcess120ms));
EXPECT_CALL(expand, Reset());
EXPECT_EQ(static_cast<int>(kPacketsizeBytes),
normal.Process(input,
kPacketsizeBytes,
kModeExpand,
mute_factor_array.get(),
&output));
EXPECT_EQ(kPacketsizeBytes, output.Size());
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope.
}
// TODO(hlundin): Write more tests. // TODO(hlundin): Write more tests.
} // namespace webrtc } // namespace webrtc