Implement audio_interruption metric for kCodecPlc

Audio interruption metric is not implemented for codecs doing their own PLC. R=ivoc@webrtc.org, jakobi@webrtc.org Bug: b/177523033 webrtc:12456 Change-Id: I0aca6fa5c0ff617e76ee1e4ed8d95703c7097223 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/206561 Reviewed-by: Oskar Sundbom <ossu@webrtc.org> Reviewed-by: Christoffer Rodbro <crodbro@webrtc.org> Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org> Reviewed-by: Ivo Creusen <ivoc@webrtc.org> Commit-Queue: Pablo Barrera González <barrerap@google.com> Cr-Commit-Position: refs/heads/master@{#33229}
2021-02-10 10:38:50 +01:00
parent 983627c88d
commit ff0e01f689
6 changed files with 146 additions and 36 deletions
--- a/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc
@ -10,7 +10,6 @@

 // Test to verify correct operation when using the decoder-internal PLC.

-#include <algorithm>
 #include <memory>
 #include <utility>
 #include <vector>
@ -33,6 +32,9 @@ namespace webrtc {
 namespace test {
 namespace {

+constexpr int kSampleRateHz = 32000;
+constexpr int kRunTimeMs = 10000;
+
 // This class implements a fake decoder. The decoder will read audio from a file
 // and present as output, both for regular decoding and for PLC.
 class AudioDecoderPlc : public AudioDecoder {
@ -48,7 +50,8 @@ class AudioDecoderPlc : public AudioDecoder {
                     int sample_rate_hz,
                     int16_t* decoded,
                     SpeechType* speech_type) override {
-    RTC_CHECK_EQ(encoded_len / 2, 20 * sample_rate_hz_ / 1000);
+    RTC_CHECK_GE(encoded_len / 2, 10 * sample_rate_hz_ / 1000);
+    RTC_CHECK_LE(encoded_len / 2, 2 * 10 * sample_rate_hz_ / 1000);
    RTC_CHECK_EQ(sample_rate_hz, sample_rate_hz_);
    RTC_CHECK(decoded);
    RTC_CHECK(speech_type);
@ -60,17 +63,21 @@ class AudioDecoderPlc : public AudioDecoder {

  void GeneratePlc(size_t requested_samples_per_channel,
                   rtc::BufferT<int16_t>* concealment_audio) override {
+    // Instead of generating random data for GeneratePlc we use the same data as
+    // the input, so we can check that we produce the same result independently
+    // of the losses.
+    RTC_DCHECK_EQ(requested_samples_per_channel, 10 * sample_rate_hz_ / 1000);
+
    // Must keep a local copy of this since DecodeInternal sets it to false.
    const bool last_was_plc = last_was_plc_;
-    SpeechType speech_type;
+
    std::vector<int16_t> decoded(5760);
-    int dec_len = DecodeInternal(nullptr, 2 * 20 * sample_rate_hz_ / 1000,
+    SpeechType speech_type;
+    int dec_len = DecodeInternal(nullptr, 2 * 10 * sample_rate_hz_ / 1000,
                                 sample_rate_hz_, decoded.data(), &speech_type);
-    // This fake decoder can only generate 20 ms of PLC data each time. Make
-    // sure the caller didn't ask for more.
-    RTC_CHECK_GE(dec_len, requested_samples_per_channel);
    concealment_audio->AppendData(decoded.data(), dec_len);
    concealed_samples_ += rtc::checked_cast<size_t>(dec_len);
+
    if (!last_was_plc) {
      ++concealment_events_;
    }
@ -103,11 +110,15 @@ class ZeroSampleGenerator : public EncodeNetEqInput::Generator {
 };

 // A NetEqInput which connects to another NetEqInput, but drops a number of
-// packets on the way.
+// consecutive packets on the way
 class LossyInput : public NetEqInput {
 public:
-  LossyInput(int loss_cadence, std::unique_ptr<NetEqInput> input)
-      : loss_cadence_(loss_cadence), input_(std::move(input)) {}
+  LossyInput(int loss_cadence,
+             int burst_length,
+             std::unique_ptr<NetEqInput> input)
+      : loss_cadence_(loss_cadence),
+        burst_length_(burst_length),
+        input_(std::move(input)) {}

  absl::optional<int64_t> NextPacketTime() const override {
    return input_->NextPacketTime();
@ -119,8 +130,12 @@ class LossyInput : public NetEqInput {

  std::unique_ptr<PacketData> PopPacket() override {
    if (loss_cadence_ != 0 && (++count_ % loss_cadence_) == 0) {
-      // Pop one extra packet to create the loss.
-      input_->PopPacket();
+      // Pop `burst_length_` packets to create the loss.
+      auto packet_to_return = input_->PopPacket();
+      for (int i = 0; i < burst_length_; i++) {
+        input_->PopPacket();
+      }
+      return packet_to_return;
    }
    return input_->PopPacket();
  }
@ -135,6 +150,7 @@ class LossyInput : public NetEqInput {

 private:
  const int loss_cadence_;
+  const int burst_length_;
  int count_ = 0;
  const std::unique_ptr<NetEqInput> input_;
 };
@ -149,7 +165,14 @@ class AudioChecksumWithOutput : public AudioChecksum {
  std::string& output_str_;
 };

-NetEqNetworkStatistics RunTest(int loss_cadence, std::string* checksum) {
+struct TestStatistics {
+  NetEqNetworkStatistics network;
+  NetEqLifetimeStatistics lifetime;
+};
+
+TestStatistics RunTest(int loss_cadence,
+                       int burst_length,
+                       std::string* checksum) {
  NetEq::Config config;
  config.for_test_no_time_stretching = true;

@ -157,20 +180,18 @@ NetEqNetworkStatistics RunTest(int loss_cadence, std::string* checksum) {
  // but the actual encoded samples will never be used by the decoder in the
  // test. See below about the decoder.
  auto generator = std::make_unique<ZeroSampleGenerator>();
-  constexpr int kSampleRateHz = 32000;
  constexpr int kPayloadType = 100;
  AudioEncoderPcm16B::Config encoder_config;
  encoder_config.sample_rate_hz = kSampleRateHz;
  encoder_config.payload_type = kPayloadType;
  auto encoder = std::make_unique<AudioEncoderPcm16B>(encoder_config);
-  constexpr int kRunTimeMs = 10000;
  auto input = std::make_unique<EncodeNetEqInput>(
      std::move(generator), std::move(encoder), kRunTimeMs);
  // Wrap the input in a loss function.
-  auto lossy_input =
-      std::make_unique<LossyInput>(loss_cadence, std::move(input));
+  auto lossy_input = std::make_unique<LossyInput>(loss_cadence, burst_length,
+                                                  std::move(input));

-  // Settinng up decoders.
+  // Setting up decoders.
  NetEqTest::DecoderMap decoders;
  // Using a fake decoder which simply reads the output audio from a file.
  auto input_file = std::make_unique<InputAudioFile>(
@ -195,24 +216,98 @@ NetEqNetworkStatistics RunTest(int loss_cadence, std::string* checksum) {
  auto lifetime_stats = neteq_test.LifetimeStats();
  EXPECT_EQ(dec.concealed_samples(), lifetime_stats.concealed_samples);
  EXPECT_EQ(dec.concealment_events(), lifetime_stats.concealment_events);
-
-  return neteq_test.SimulationStats();
+  return {neteq_test.SimulationStats(), neteq_test.LifetimeStats()};
 }
 }  // namespace

-TEST(NetEqDecoderPlc, Test) {
+// Check that some basic metrics are produced in the right direction. In
+// particular, expand_rate should only increase if there are losses present. Our
+// dummy decoder is designed such as the checksum should always be the same
+// regardless of the losses given that calls are executed in the right order.
+TEST(NetEqDecoderPlc, BasicMetrics) {
  std::string checksum;
-  auto stats = RunTest(10, &checksum);
+
+  // Drop 1 packet every 10 packets.
+  auto stats = RunTest(10, 1, &checksum);

  std::string checksum_no_loss;
-  auto stats_no_loss = RunTest(0, &checksum_no_loss);
+  auto stats_no_loss = RunTest(0, 0, &checksum_no_loss);

  EXPECT_EQ(checksum, checksum_no_loss);

-  EXPECT_EQ(stats.preemptive_rate, stats_no_loss.preemptive_rate);
-  EXPECT_EQ(stats.accelerate_rate, stats_no_loss.accelerate_rate);
-  EXPECT_EQ(0, stats_no_loss.expand_rate);
-  EXPECT_GT(stats.expand_rate, 0);
+  EXPECT_EQ(stats.network.preemptive_rate,
+            stats_no_loss.network.preemptive_rate);
+  EXPECT_EQ(stats.network.accelerate_rate,
+            stats_no_loss.network.accelerate_rate);
+  EXPECT_EQ(0, stats_no_loss.network.expand_rate);
+  EXPECT_GT(stats.network.expand_rate, 0);
+}
+
+// Checks that interruptions are not counted in small losses but they are
+// correctly counted in long interruptions.
+TEST(NetEqDecoderPlc, CountInterruptions) {
+  std::string checksum;
+  std::string checksum_2;
+  std::string checksum_3;
+
+  // Half of the packets lost but in short interruptions.
+  auto stats_no_interruptions = RunTest(1, 1, &checksum);
+  // One lost of 500 ms (250 packets).
+  auto stats_one_interruption = RunTest(200, 250, &checksum_2);
+  // Two losses of 250ms each (125 packets).
+  auto stats_two_interruptions = RunTest(125, 125, &checksum_3);
+
+  EXPECT_EQ(checksum, checksum_2);
+  EXPECT_EQ(checksum, checksum_3);
+  EXPECT_GT(stats_no_interruptions.network.expand_rate, 0);
+  EXPECT_EQ(stats_no_interruptions.lifetime.total_interruption_duration_ms, 0);
+  EXPECT_EQ(stats_no_interruptions.lifetime.interruption_count, 0);
+
+  EXPECT_GT(stats_one_interruption.network.expand_rate, 0);
+  EXPECT_EQ(stats_one_interruption.lifetime.total_interruption_duration_ms,
+            5000);
+  EXPECT_EQ(stats_one_interruption.lifetime.interruption_count, 1);
+
+  EXPECT_GT(stats_two_interruptions.network.expand_rate, 0);
+  EXPECT_EQ(stats_two_interruptions.lifetime.total_interruption_duration_ms,
+            5000);
+  EXPECT_EQ(stats_two_interruptions.lifetime.interruption_count, 2);
+}
+
+// Checks that small losses do not produce interruptions.
+TEST(NetEqDecoderPlc, NoInterruptionsInSmallLosses) {
+  std::string checksum_1;
+  std::string checksum_4;
+
+  auto stats_1 = RunTest(300, 1, &checksum_1);
+  auto stats_4 = RunTest(300, 4, &checksum_4);
+
+  EXPECT_EQ(checksum_1, checksum_4);
+
+  EXPECT_EQ(stats_1.lifetime.interruption_count, 0);
+  EXPECT_EQ(stats_1.lifetime.total_interruption_duration_ms, 0);
+  EXPECT_EQ(stats_1.lifetime.concealed_samples, 640u);  // 20ms of concealment.
+  EXPECT_EQ(stats_1.lifetime.concealment_events, 1u);   // in just one event.
+
+  EXPECT_EQ(stats_4.lifetime.interruption_count, 0);
+  EXPECT_EQ(stats_4.lifetime.total_interruption_duration_ms, 0);
+  EXPECT_EQ(stats_4.lifetime.concealed_samples, 2560u);  // 80ms of concealment.
+  EXPECT_EQ(stats_4.lifetime.concealment_events, 1u);    // in just one event.
+}
+
+// Checks that interruptions of different sizes report correct duration.
+TEST(NetEqDecoderPlc, InterruptionsReportCorrectSize) {
+  std::string checksum;
+
+  for (int burst_length = 5; burst_length < 10; burst_length++) {
+    auto stats = RunTest(300, burst_length, &checksum);
+    auto duration = stats.lifetime.total_interruption_duration_ms;
+    if (burst_length < 8) {
+      EXPECT_EQ(duration, 0);
+    } else {
+      EXPECT_EQ(duration, burst_length * 20);
+    }
+  }
 }

 }  // namespace test
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@ -1214,6 +1214,11 @@ int NetEqImpl::GetDecision(Operation* operation,
  }

  controller_->ExpandDecision(*operation);
+  if ((last_mode_ == Mode::kCodecPlc) && (*operation != Operation::kExpand)) {
+    // Getting out of the PLC expand mode, reporting interruptions.
+    // NetEq PLC reports this metrics in expand.cc
+    stats_->EndExpandEvent(fs_hz_);
+  }

  // Check conditions for reset.
  if (new_codec_ || *operation == Operation::kUndefined) {
@ -2159,7 +2164,7 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
                               expand_->overlap_length());

  normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_,
-                           expand_.get()));
+                           expand_.get(), stats_.get()));
  accelerate_.reset(
      accelerate_factory_->Create(fs_hz, channels, *background_noise_));
  preemptive_expand_.reset(preemptive_expand_factory_->Create(
--- a/modules/audio_coding/neteq/normal.cc
+++ b/modules/audio_coding/neteq/normal.cc
@ -14,7 +14,6 @@

 #include <algorithm>  // min

-#include "api/audio_codecs/audio_decoder.h"
 #include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "modules/audio_coding/neteq/audio_multi_vector.h"
 #include "modules/audio_coding/neteq/background_noise.h"
@ -50,6 +49,13 @@ int Normal::Process(const int16_t* input,
  // TODO(hlundin): Investigate this further.
  const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult);

+  // If last call resulted in a CodedPlc we don't need to do cross-fading but we
+  // need to report the end of the interruption once we are back to normal
+  // operation.
+  if (last_mode == NetEq::Mode::kCodecPlc) {
+    statistics_->EndExpandEvent(fs_hz_);
+  }
+
  // Check if last RecOut call resulted in an Expand. If so, we have to take
  // care of some cross-fading and unmuting.
  if (last_mode == NetEq::Mode::kExpand) {
--- a/modules/audio_coding/neteq/normal.h
+++ b/modules/audio_coding/neteq/normal.h
@ -15,6 +15,7 @@
 #include <string.h>  // Access to size_t.

 #include "api/neteq/neteq.h"
+#include "modules/audio_coding/neteq/statistics_calculator.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/constructor_magic.h"
 #include "rtc_base/numerics/safe_conversions.h"
@ -35,14 +36,16 @@ class Normal {
  Normal(int fs_hz,
         DecoderDatabase* decoder_database,
         const BackgroundNoise& background_noise,
-         Expand* expand)
+         Expand* expand,
+         StatisticsCalculator* statistics)
      : fs_hz_(fs_hz),
        decoder_database_(decoder_database),
        background_noise_(background_noise),
        expand_(expand),
        samples_per_ms_(rtc::CheckedDivExact(fs_hz_, 1000)),
        default_win_slope_Q14_(
-            rtc::dchecked_cast<uint16_t>((1 << 14) / samples_per_ms_)) {}
+            rtc::dchecked_cast<uint16_t>((1 << 14) / samples_per_ms_)),
+        statistics_(statistics) {}

  virtual ~Normal() {}

@ -64,6 +67,7 @@ class Normal {
  Expand* expand_;
  const size_t samples_per_ms_;
  const int16_t default_win_slope_Q14_;
+  StatisticsCalculator* const statistics_;

  RTC_DISALLOW_COPY_AND_ASSIGN(Normal);
 };
--- a/modules/audio_coding/neteq/normal_unittest.cc
+++ b/modules/audio_coding/neteq/normal_unittest.cc
@ -50,7 +50,7 @@ TEST(Normal, CreateAndDestroy) {
  RandomVector random_vector;
  StatisticsCalculator statistics;
  Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
-  Normal normal(fs, &db, bgn, &expand);
+  Normal normal(fs, &db, bgn, &expand, &statistics);
  EXPECT_CALL(db, Die());  // Called when |db| goes out of scope.
 }

@ -64,7 +64,7 @@ TEST(Normal, AvoidDivideByZero) {
  StatisticsCalculator statistics;
  MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs,
                    channels);
-  Normal normal(fs, &db, bgn, &expand);
+  Normal normal(fs, &db, bgn, &expand, &statistics);

  int16_t input[1000] = {0};
  AudioMultiVector output(channels);
@ -99,7 +99,7 @@ TEST(Normal, InputLengthAndChannelsDoNotMatch) {
  StatisticsCalculator statistics;
  MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs,
                    channels);
-  Normal normal(fs, &db, bgn, &expand);
+  Normal normal(fs, &db, bgn, &expand, &statistics);

  int16_t input[1000] = {0};
  AudioMultiVector output(channels);
@ -124,7 +124,7 @@ TEST(Normal, LastModeExpand120msPacket) {
  StatisticsCalculator statistics;
  MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs,
                    kChannels);
-  Normal normal(kFs, &db, bgn, &expand);
+  Normal normal(kFs, &db, bgn, &expand, &statistics);

  int16_t input[kPacketsizeBytes] = {0};
  AudioMultiVector output(kChannels);