Split iSAC encoder/decoder: Test more cases (and make sure they work)

This patch tests separate iSAC encoder and decoder in more cases (32
kHz in addition to 16 kHz, and 30 ms adaptive and 60 ms nonadaptive).

In order to handle 32 kHz adaptive, the decoder needs to be told of
the encoder's sample rate (16 kHz worked already because that's the
default). And since we can't set the encoder's frame size without also
setting its bit rate, we need a way to set the decoder's bit rate as
well.

It turned out to be way too messy to continue verifying that the
bandwidth estimator does something reasonable in all these cases,
because it seems it doesn't. So the GetSetBandwidthInfo is now just
responsible for ensuring that split encoder/decoder behaves the same
as conjoined encoder/decoder; the job of verifying that the bandwidth
estimator does its job properly falls on some other test (that doesn't
exist yet).

Review URL: https://codereview.webrtc.org/1225093005

Cr-Commit-Position: refs/heads/master@{#9583}
This commit is contained in:
kwiberg
2015-07-14 18:54:36 -07:00
committed by Commit bot
parent 2d3b7e2173
commit 3258db26ed
7 changed files with 185 additions and 153 deletions

View File

@ -93,6 +93,15 @@ struct IsacFix {
DCHECK_EQ(sample_rate_hz, kFixSampleRate);
return 0;
}
static inline void SetEncSampRateInDecoder(instance_type* inst,
uint16_t sample_rate_hz) {
DCHECK_EQ(sample_rate_hz, kFixSampleRate);
}
static inline void SetInitialBweBottleneck(
instance_type* inst,
int bottleneck_bits_per_second) {
WebRtcIsacfix_SetInitialBweBottleneck(inst, bottleneck_bits_per_second);
}
static inline int16_t UpdateBwEstimate(instance_type* inst,
const uint8_t* encoded,
int32_t packet_size,

View File

@ -379,7 +379,8 @@ extern "C" {
int16_t rate,
int framesize);
void WebRtcIsacfix_SetInitialBweBottleneck(ISACFIX_MainStruct* ISAC_main_inst,
int bottleneck_bits_per_second);
/****************************************************************************
* WebRtcIsacfix_ControlBwe(...)

View File

@ -1110,6 +1110,13 @@ int16_t WebRtcIsacfix_Control(ISACFIX_MainStruct *ISAC_main_inst,
return 0;
}
void WebRtcIsacfix_SetInitialBweBottleneck(ISACFIX_MainStruct* ISAC_main_inst,
int bottleneck_bits_per_second) {
ISACFIX_SubStruct* inst = (ISACFIX_SubStruct*)ISAC_main_inst;
assert(bottleneck_bits_per_second >= 10000 &&
bottleneck_bits_per_second <= 32000);
inst->bwestimator_obj.sendBwAvg = ((uint32_t)bottleneck_bits_per_second) << 7;
}
/****************************************************************************
* WebRtcIsacfix_ControlBwe(...)

View File

@ -91,6 +91,15 @@ struct IsacFloat {
uint16_t sample_rate_hz) {
return WebRtcIsac_SetEncSampRate(inst, sample_rate_hz);
}
static inline void SetEncSampRateInDecoder(instance_type* inst,
uint16_t sample_rate_hz) {
WebRtcIsac_SetEncSampRateInDecoder(inst, sample_rate_hz);
}
static inline void SetInitialBweBottleneck(
instance_type* inst,
int bottleneck_bits_per_second) {
WebRtcIsac_SetInitialBweBottleneck(inst, bottleneck_bits_per_second);
}
static inline int16_t UpdateBwEstimate(instance_type* inst,
const uint8_t* encoded,
int32_t packet_size,

View File

@ -269,6 +269,8 @@ extern "C" {
int32_t rate,
int framesize);
void WebRtcIsac_SetInitialBweBottleneck(ISACStruct* ISAC_main_inst,
int bottleneck_bits_per_second);
/******************************************************************************
* WebRtcIsac_ControlBwe(...)
@ -706,13 +708,18 @@ extern "C" {
int16_t* decoded,
int16_t* speechType);
/* Fills in an IsacBandwidthInfo struct. */
/* Fills in an IsacBandwidthInfo struct. |inst| should be a decoder. */
void WebRtcIsac_GetBandwidthInfo(ISACStruct* inst, IsacBandwidthInfo* bwinfo);
/* Uses the values from an IsacBandwidthInfo struct. */
/* Uses the values from an IsacBandwidthInfo struct. |inst| should be an
encoder. */
void WebRtcIsac_SetBandwidthInfo(ISACStruct* inst,
const IsacBandwidthInfo* bwinfo);
/* If |inst| is a decoder but not an encoder: tell it what sample rate the
encoder is using, for bandwidth estimation purposes. */
void WebRtcIsac_SetEncSampRateInDecoder(ISACStruct* inst, int sample_rate_hz);
#if defined(__cplusplus)
}
#endif

View File

@ -1578,6 +1578,13 @@ int16_t WebRtcIsac_Control(ISACStruct* ISAC_main_inst,
return 0;
}
void WebRtcIsac_SetInitialBweBottleneck(ISACStruct* ISAC_main_inst,
int bottleneck_bits_per_second) {
ISACMainStruct* instISAC = (ISACMainStruct*)ISAC_main_inst;
assert(bottleneck_bits_per_second >= 10000 &&
bottleneck_bits_per_second <= 32000);
instISAC->bwestimator_obj.send_bw_avg = (float)bottleneck_bits_per_second;
}
/****************************************************************************
* WebRtcIsac_ControlBwe(...)
@ -2399,3 +2406,12 @@ void WebRtcIsac_SetBandwidthInfo(ISACStruct* inst,
assert(instISAC->initFlag & BIT_MASK_ENC_INIT);
WebRtcIsacBw_SetBandwidthInfo(&instISAC->bwestimator_obj, bwinfo);
}
void WebRtcIsac_SetEncSampRateInDecoder(ISACStruct* inst,
int sample_rate_hz) {
ISACMainStruct* instISAC = (ISACMainStruct*)inst;
assert(instISAC->initFlag & BIT_MASK_DEC_INIT);
assert(!(instISAC->initFlag & BIT_MASK_ENC_INIT));
assert(sample_rate_hz == 16000 || sample_rate_hz == 32000);
instISAC->encoderSamplingRateKHz = sample_rate_hz / 1000;
}

View File

@ -24,10 +24,11 @@ namespace webrtc {
namespace {
const int kIsacNumberOfSamples = 32 * 60; // 60 ms at 32 kHz
std::vector<int16_t> LoadSpeechData() {
webrtc::test::InputAudioFile input_file(
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"));
static const int kIsacNumberOfSamples = 32 * 60; // 60 ms at 32 kHz
std::vector<int16_t> speech_data(kIsacNumberOfSamples);
input_file.Read(kIsacNumberOfSamples, speech_data.data());
return speech_data;
@ -41,32 +42,45 @@ IsacBandwidthInfo GetBwInfo(typename T::instance_type* inst) {
return bi;
}
// Encodes one packet. Returns the packet duration in milliseconds.
template <typename T>
rtc::Buffer EncodePacket(typename T::instance_type* inst,
const IsacBandwidthInfo* bi,
const int16_t* speech_data,
int framesize_ms) {
rtc::Buffer output(1000);
for (int i = 0;; ++i) {
int EncodePacket(typename T::instance_type* inst,
const IsacBandwidthInfo* bi,
const int16_t* speech_data,
rtc::Buffer* output) {
output->SetSize(1000);
for (int duration_ms = 10;; duration_ms += 10) {
if (bi)
T::SetBandwidthInfo(inst, bi);
int encoded_bytes = T::Encode(inst, speech_data, output.data());
if (i + 1 == framesize_ms / 10) {
int encoded_bytes = T::Encode(inst, speech_data, output->data());
if (encoded_bytes > 0 || duration_ms >= 60) {
EXPECT_GT(encoded_bytes, 0);
EXPECT_LE(static_cast<size_t>(encoded_bytes), output.size());
output.SetSize(encoded_bytes);
return output;
EXPECT_LE(static_cast<size_t>(encoded_bytes), output->size());
output->SetSize(encoded_bytes);
return duration_ms;
}
EXPECT_EQ(0, encoded_bytes);
}
}
template <typename T>
std::vector<int16_t> DecodePacket(typename T::instance_type* inst,
const rtc::Buffer& encoded) {
std::vector<int16_t> decoded(kIsacNumberOfSamples);
int16_t speech_type;
int nsamples = T::DecodeInternal(inst, encoded.data(), encoded.size(),
&decoded.front(), &speech_type);
EXPECT_GT(nsamples, 0);
EXPECT_LE(static_cast<size_t>(nsamples), decoded.size());
decoded.resize(nsamples);
return decoded;
}
class BoundedCapacityChannel final {
public:
BoundedCapacityChannel(int rate_bits_per_second)
BoundedCapacityChannel(int sample_rate_hz, int rate_bits_per_second)
: current_time_rtp_(0),
channel_rate_bytes_per_sample_(rate_bits_per_second /
(8.0 * kSamplesPerSecond)) {}
(8.0 * sample_rate_hz)) {}
// Simulate sending the given number of bytes at the given RTP time. Returns
// the new current RTP time after the sending is done.
@ -81,47 +95,6 @@ class BoundedCapacityChannel final {
// The somewhat strange unit for channel rate, bytes per sample, is because
// RTP time is measured in samples:
const double channel_rate_bytes_per_sample_;
static const int kSamplesPerSecond = 16000;
};
template <typename T, bool adaptive>
struct TestParam {};
template <>
struct TestParam<IsacFloat, true> {
static const int time_to_settle = 200;
static int ExpectedRateBitsPerSecond(int rate_bits_per_second) {
return rate_bits_per_second;
}
};
template <>
struct TestParam<IsacFix, true> {
static const int time_to_settle = 350;
static int ExpectedRateBitsPerSecond(int rate_bits_per_second) {
// For some reason, IsacFix fails to adapt to the channel's actual
// bandwidth. Instead, it settles on a few hundred packets at 10kbit/s,
// then a few hundred at 5kbit/s, then a few hundred at 10kbit/s, and so
// on. The 200 packets starting at 350 are in the middle of the first
// 10kbit/s run.
return 10000;
}
};
template <>
struct TestParam<IsacFloat, false> {
static const int time_to_settle = 0;
static int ExpectedRateBitsPerSecond(int rate_bits_per_second) {
return 32000;
}
};
template <>
struct TestParam<IsacFix, false> {
static const int time_to_settle = 0;
static int ExpectedRateBitsPerSecond(int rate_bits_per_second) {
return 16000;
}
};
// Test that the iSAC encoder produces identical output whether or not we use a
@ -129,143 +102,153 @@ struct TestParam<IsacFix, false> {
// communicate BW estimation info explicitly.
template <typename T, bool adaptive>
void TestGetSetBandwidthInfo(const int16_t* speech_data,
int rate_bits_per_second) {
using Param = TestParam<T, adaptive>;
const int framesize_ms = adaptive ? 60 : 30;
int rate_bits_per_second,
int sample_rate_hz,
int frame_size_ms) {
const int bit_rate = 32000;
// Conjoined encoder/decoder pair:
typename T::instance_type* encdec;
ASSERT_EQ(0, T::Create(&encdec));
ASSERT_EQ(0, T::EncoderInit(encdec, adaptive ? 0 : 1));
ASSERT_EQ(0, T::DecoderInit(encdec));
ASSERT_EQ(0, T::SetEncSampRate(encdec, sample_rate_hz));
if (adaptive)
ASSERT_EQ(0, T::ControlBwe(encdec, bit_rate, frame_size_ms, false));
else
ASSERT_EQ(0, T::Control(encdec, bit_rate, frame_size_ms));
// Disjoint encoder/decoder pair:
typename T::instance_type* enc;
ASSERT_EQ(0, T::Create(&enc));
ASSERT_EQ(0, T::EncoderInit(enc, adaptive ? 0 : 1));
ASSERT_EQ(0, T::SetEncSampRate(enc, sample_rate_hz));
if (adaptive)
ASSERT_EQ(0, T::ControlBwe(enc, bit_rate, frame_size_ms, false));
else
ASSERT_EQ(0, T::Control(enc, bit_rate, frame_size_ms));
typename T::instance_type* dec;
ASSERT_EQ(0, T::Create(&dec));
ASSERT_EQ(0, T::DecoderInit(dec));
T::SetInitialBweBottleneck(dec, bit_rate);
T::SetEncSampRateInDecoder(dec, sample_rate_hz);
// 0. Get initial BW info from decoder.
auto bi = GetBwInfo<T>(dec);
BoundedCapacityChannel channel1(rate_bits_per_second),
channel2(rate_bits_per_second);
std::vector<size_t> packet_sizes;
for (int i = 0; i < Param::time_to_settle + 200; ++i) {
BoundedCapacityChannel channel1(sample_rate_hz, rate_bits_per_second),
channel2(sample_rate_hz, rate_bits_per_second);
int elapsed_time_ms = 0;
for (int i = 0; elapsed_time_ms < 10000; ++i) {
std::ostringstream ss;
ss << " i = " << i;
SCOPED_TRACE(ss.str());
// 1. Encode 6 * 10 ms (adaptive) or 3 * 10 ms (nonadaptive). The separate
// encoder is given the BW info before each encode call.
auto bitstream1 =
EncodePacket<T>(encdec, nullptr, speech_data, framesize_ms);
auto bitstream2 = EncodePacket<T>(enc, &bi, speech_data, framesize_ms);
// 1. Encode 3 * 10 ms or 6 * 10 ms. The separate encoder is given the BW
// info before each encode call.
rtc::Buffer bitstream1, bitstream2;
int duration1_ms =
EncodePacket<T>(encdec, nullptr, speech_data, &bitstream1);
int duration2_ms = EncodePacket<T>(enc, &bi, speech_data, &bitstream2);
EXPECT_EQ(duration1_ms, duration2_ms);
if (adaptive)
EXPECT_TRUE(duration1_ms == 30 || duration1_ms == 60);
else
EXPECT_EQ(frame_size_ms, duration1_ms);
ASSERT_EQ(bitstream1.size(), bitstream2.size());
EXPECT_EQ(bitstream1, bitstream2);
if (i > Param::time_to_settle)
packet_sizes.push_back(bitstream1.size());
// 2. Deliver the encoded data to the decoders (but don't actually ask them
// to decode it; that's not necessary). Then get new BW info from the
// separate decoder.
const int samples_per_packet = 16 * framesize_ms;
const int send_time = i * samples_per_packet;
// 2. Deliver the encoded data to the decoders.
const int send_time = elapsed_time_ms * (sample_rate_hz / 1000);
EXPECT_EQ(0, T::UpdateBwEstimate(
encdec, bitstream1.data(), bitstream1.size(), i, send_time,
channel1.Send(send_time, bitstream1.size())));
EXPECT_EQ(0, T::UpdateBwEstimate(
dec, bitstream2.data(), bitstream2.size(), i, send_time,
channel2.Send(send_time, bitstream2.size())));
// 3. Decode, and get new BW info from the separate decoder.
ASSERT_EQ(0, T::SetDecSampRate(encdec, sample_rate_hz));
ASSERT_EQ(0, T::SetDecSampRate(dec, sample_rate_hz));
auto decoded1 = DecodePacket<T>(encdec, bitstream1);
auto decoded2 = DecodePacket<T>(dec, bitstream2);
EXPECT_EQ(decoded1, decoded2);
bi = GetBwInfo<T>(dec);
elapsed_time_ms += duration1_ms;
}
EXPECT_EQ(0, T::Free(encdec));
EXPECT_EQ(0, T::Free(enc));
EXPECT_EQ(0, T::Free(dec));
// The average send bitrate is close to the channel's capacity.
double avg_size =
std::accumulate(packet_sizes.begin(), packet_sizes.end(), 0) /
static_cast<double>(packet_sizes.size());
double avg_rate_bits_per_second = 8.0 * avg_size / (framesize_ms * 1e-3);
double expected_rate_bits_per_second =
Param::ExpectedRateBitsPerSecond(rate_bits_per_second);
EXPECT_GT(avg_rate_bits_per_second / expected_rate_bits_per_second, 0.95);
EXPECT_LT(avg_rate_bits_per_second / expected_rate_bits_per_second, 1.06);
// The largest packet isn't that large, and the smallest not that small.
size_t min_size = *std::min_element(packet_sizes.begin(), packet_sizes.end());
size_t max_size = *std::max_element(packet_sizes.begin(), packet_sizes.end());
double size_range = max_size - min_size;
EXPECT_LE(size_range / avg_size, 0.16);
}
enum class IsacType { Fix, Float };
std::ostream& operator<<(std::ostream& os, IsacType t) {
os << (t == IsacType::Fix ? "fix" : "float");
return os;
}
struct IsacTestParam {
IsacType isac_type;
bool adaptive;
int channel_rate_bits_per_second;
int sample_rate_hz;
int frame_size_ms;
friend std::ostream& operator<<(std::ostream& os, const IsacTestParam& itp) {
os << '{' << itp.isac_type << ','
<< (itp.adaptive ? "adaptive" : "nonadaptive") << ','
<< itp.channel_rate_bits_per_second << ',' << itp.sample_rate_hz << ','
<< itp.frame_size_ms << '}';
return os;
}
};
class IsacCommonTest : public testing::TestWithParam<IsacTestParam> {};
} // namespace
TEST(IsacCommonTest, GetSetBandwidthInfoFloat12kAdaptive) {
TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 12000);
TEST_P(IsacCommonTest, GetSetBandwidthInfo) {
auto p = GetParam();
auto test_fun = [p] {
if (p.isac_type == IsacType::Fix) {
if (p.adaptive)
return TestGetSetBandwidthInfo<IsacFix, true>;
else
return TestGetSetBandwidthInfo<IsacFix, false>;
} else {
if (p.adaptive)
return TestGetSetBandwidthInfo<IsacFloat, true>;
else
return TestGetSetBandwidthInfo<IsacFloat, false>;
}
}();
test_fun(LoadSpeechData().data(), p.channel_rate_bits_per_second,
p.sample_rate_hz, p.frame_size_ms);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFloat15kAdaptive) {
TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 15000);
std::vector<IsacTestParam> TestCases() {
static const IsacType types[] = {IsacType::Fix, IsacType::Float};
static const bool adaptives[] = {true, false};
static const int channel_rates[] = {12000, 15000, 19000, 22000};
static const int sample_rates[] = {16000, 32000};
static const int frame_sizes[] = {30, 60};
std::vector<IsacTestParam> cases;
for (IsacType type : types)
for (bool adaptive : adaptives)
for (int channel_rate : channel_rates)
for (int sample_rate : sample_rates)
if (!(type == IsacType::Fix && sample_rate == 32000))
for (int frame_size : frame_sizes)
if (!(sample_rate == 32000 && frame_size == 60))
cases.push_back(
{type, adaptive, channel_rate, sample_rate, frame_size});
return cases;
}
TEST(IsacCommonTest, GetSetBandwidthInfoFloat19kAdaptive) {
TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 19000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFloat22kAdaptive) {
TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 22000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFix12kAdaptive) {
TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 12000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFix15kAdaptive) {
TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 15000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFix19kAdaptive) {
TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 19000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFix22kAdaptive) {
TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 22000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFloat12k) {
TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 12000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFloat15k) {
TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 15000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFloat19k) {
TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 19000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFloat22k) {
TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 22000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFix12k) {
TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 12000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFix15k) {
TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 15000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFix19k) {
TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 19000);
}
TEST(IsacCommonTest, GetSetBandwidthInfoFix22k) {
TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 22000);
}
INSTANTIATE_TEST_CASE_P(, IsacCommonTest, testing::ValuesIn(TestCases()));
} // namespace webrtc