Added a ParsePayload method to AudioDecoder.

It allows the decoder to split the input up into usable chunks before they are put into NetEq's PacketBuffer. Eventually, all packet splitting will move into ParsePayload. There's currently a base implementation of ParsePayload. It will generate a single Frame that calls the underlying AudioDecoder for getting Duration() and to Decode. BUG=webrtc:5805 BUG=chromium:428099 Review-Url: https://codereview.webrtc.org/2326953003 Cr-Commit-Position: refs/heads/master@{#14300}
2016-09-20 01:38:00 -07:00
parent 02bd5125e9
commit 61a208b1b8
8 changed files with 268 additions and 115 deletions
--- a/webrtc/modules/audio_coding/codecs/audio_decoder.cc
+++ b/webrtc/modules/audio_coding/codecs/audio_decoder.cc
@ -12,6 +12,8 @@

 #include <assert.h>

+#include <utility>
+
 #include "webrtc/base/array_view.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/sanitizer.h"
@ -19,6 +21,76 @@

 namespace webrtc {

+namespace {
+class LegacyFrame final : public AudioDecoder::EncodedAudioFrame {
+ public:
+  LegacyFrame(AudioDecoder* decoder,
+              rtc::Buffer&& payload,
+              bool is_primary_payload)
+      : decoder_(decoder),
+        payload_(std::move(payload)),
+        is_primary_payload_(is_primary_payload) {}
+
+  size_t Duration() const override {
+    int ret;
+    if (is_primary_payload_) {
+      ret = decoder_->PacketDuration(payload_.data(), payload_.size());
+    } else {
+      ret = decoder_->PacketDurationRedundant(payload_.data(), payload_.size());
+    }
+    return (ret < 0) ? 0 : static_cast<size_t>(ret);
+  }
+
+  rtc::Optional<DecodeResult> Decode(
+      rtc::ArrayView<int16_t> decoded) const override {
+    AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
+    int ret;
+    if (is_primary_payload_) {
+      ret = decoder_->Decode(
+          payload_.data(), payload_.size(), decoder_->SampleRateHz(),
+          decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
+    } else {
+      ret = decoder_->DecodeRedundant(
+          payload_.data(), payload_.size(), decoder_->SampleRateHz(),
+          decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
+    }
+
+    if (ret < 0)
+      return rtc::Optional<DecodeResult>();
+
+    return rtc::Optional<DecodeResult>({static_cast<size_t>(ret), speech_type});
+  }
+
+ private:
+  AudioDecoder* const decoder_;
+  const rtc::Buffer payload_;
+  const bool is_primary_payload_;
+};
+}  // namespace
+
+AudioDecoder::ParseResult::ParseResult() = default;
+AudioDecoder::ParseResult::ParseResult(ParseResult&& b) = default;
+AudioDecoder::ParseResult::ParseResult(uint32_t timestamp,
+                                       bool primary,
+                                       std::unique_ptr<EncodedAudioFrame> frame)
+    : timestamp(timestamp), primary(primary), frame(std::move(frame)) {}
+
+AudioDecoder::ParseResult::~ParseResult() = default;
+
+AudioDecoder::ParseResult& AudioDecoder::ParseResult::operator=(
+    ParseResult&& b) = default;
+
+std::vector<AudioDecoder::ParseResult> AudioDecoder::ParsePayload(
+    rtc::Buffer&& payload,
+    uint32_t timestamp,
+    bool is_primary) {
+  std::vector<ParseResult> results;
+  std::unique_ptr<EncodedAudioFrame> frame(
+      new LegacyFrame(this, std::move(payload), is_primary));
+  results.emplace_back(timestamp, is_primary, std::move(frame));
+  return results;
+}
+
 int AudioDecoder::Decode(const uint8_t* encoded, size_t encoded_len,
                         int sample_rate_hz, size_t max_decoded_bytes,
                         int16_t* decoded, SpeechType* speech_type) {
--- a/webrtc/modules/audio_coding/codecs/audio_decoder.h
+++ b/webrtc/modules/audio_coding/codecs/audio_decoder.h
@ -13,7 +13,13 @@

 #include <stdlib.h>  // NULL

+#include <memory>
+#include <vector>
+
+#include "webrtc/base/array_view.h"
+#include "webrtc/base/buffer.h"
 #include "webrtc/base/constructormagic.h"
+#include "webrtc/base/optional.h"
 #include "webrtc/typedefs.h"

 namespace webrtc {
@ -33,6 +39,55 @@ class AudioDecoder {
  AudioDecoder() = default;
  virtual ~AudioDecoder() = default;

+  class EncodedAudioFrame {
+   public:
+    struct DecodeResult {
+      size_t num_decoded_samples;
+      SpeechType speech_type;
+    };
+
+    virtual ~EncodedAudioFrame() = default;
+
+    // Returns the duration in samples-per-channel of this audio frame.
+    // If no duration can be ascertained, returns zero.
+    virtual size_t Duration() const = 0;
+
+    // Decodes this frame of audio and writes the result in |decoded|.
+    // |decoded| must be large enough to store as many samples as indicated by a
+    // call to Duration() . On success, returns an rtc::Optional containing the
+    // total number of samples across all channels, as well as whether the
+    // decoder produced comfort noise or speech. On failure, returns an empty
+    // rtc::Optional. Decode may be called at most once per frame object.
+    virtual rtc::Optional<DecodeResult> Decode(
+        rtc::ArrayView<int16_t> decoded) const = 0;
+  };
+
+  struct ParseResult {
+    ParseResult();
+    ParseResult(uint32_t timestamp,
+                bool primary,
+                std::unique_ptr<EncodedAudioFrame> frame);
+    ParseResult(ParseResult&& b);
+    ~ParseResult();
+
+    ParseResult& operator=(ParseResult&& b);
+
+    // The timestamp of the frame is in samples per channel.
+    uint32_t timestamp;
+    bool primary;
+    std::unique_ptr<EncodedAudioFrame> frame;
+  };
+
+  // Let the decoder parse this payload and prepare zero or more decodable
+  // frames. Each frame must be between 10 ms and 120 ms long. The caller must
+  // ensure that the AudioDecoder object outlives any frame objects returned by
+  // this call. The decoder is free to swap or move the data from the |payload|
+  // buffer. |timestamp| is the input timestamp, in samples, corresponding to
+  // the start of the payload.
+  virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
+                                                uint32_t timestamp,
+                                                bool is_primary);
+
  // Decodes |encode_len| bytes from |encoded| and writes the result in
  // |decoded|. The maximum bytes allowed to be written into |decoded| is
  // |max_decoded_bytes|. Returns the total number of samples across all