Added a ParsePayload method to AudioDecoder.

It allows the decoder to split the input up into usable chunks before
they are put into NetEq's PacketBuffer. Eventually, all packet splitting
will move into ParsePayload.

There's currently a base implementation of ParsePayload. It will
generate a single Frame that calls the underlying AudioDecoder for
getting Duration() and to Decode.

BUG=webrtc:5805
BUG=chromium:428099

Review-Url: https://codereview.webrtc.org/2326953003
Cr-Commit-Position: refs/heads/master@{#14300}
This commit is contained in:
ossu
2016-09-20 01:38:00 -07:00
committed by Commit bot
parent 02bd5125e9
commit 61a208b1b8
8 changed files with 268 additions and 115 deletions

View File

@ -12,6 +12,8 @@
#include <assert.h>
#include <utility>
#include "webrtc/base/array_view.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/sanitizer.h"
@ -19,6 +21,76 @@
namespace webrtc {
namespace {
class LegacyFrame final : public AudioDecoder::EncodedAudioFrame {
public:
LegacyFrame(AudioDecoder* decoder,
rtc::Buffer&& payload,
bool is_primary_payload)
: decoder_(decoder),
payload_(std::move(payload)),
is_primary_payload_(is_primary_payload) {}
size_t Duration() const override {
int ret;
if (is_primary_payload_) {
ret = decoder_->PacketDuration(payload_.data(), payload_.size());
} else {
ret = decoder_->PacketDurationRedundant(payload_.data(), payload_.size());
}
return (ret < 0) ? 0 : static_cast<size_t>(ret);
}
rtc::Optional<DecodeResult> Decode(
rtc::ArrayView<int16_t> decoded) const override {
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
int ret;
if (is_primary_payload_) {
ret = decoder_->Decode(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
} else {
ret = decoder_->DecodeRedundant(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
}
if (ret < 0)
return rtc::Optional<DecodeResult>();
return rtc::Optional<DecodeResult>({static_cast<size_t>(ret), speech_type});
}
private:
AudioDecoder* const decoder_;
const rtc::Buffer payload_;
const bool is_primary_payload_;
};
} // namespace
AudioDecoder::ParseResult::ParseResult() = default;
AudioDecoder::ParseResult::ParseResult(ParseResult&& b) = default;
AudioDecoder::ParseResult::ParseResult(uint32_t timestamp,
bool primary,
std::unique_ptr<EncodedAudioFrame> frame)
: timestamp(timestamp), primary(primary), frame(std::move(frame)) {}
AudioDecoder::ParseResult::~ParseResult() = default;
AudioDecoder::ParseResult& AudioDecoder::ParseResult::operator=(
ParseResult&& b) = default;
std::vector<AudioDecoder::ParseResult> AudioDecoder::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) {
std::vector<ParseResult> results;
std::unique_ptr<EncodedAudioFrame> frame(
new LegacyFrame(this, std::move(payload), is_primary));
results.emplace_back(timestamp, is_primary, std::move(frame));
return results;
}
int AudioDecoder::Decode(const uint8_t* encoded, size_t encoded_len,
int sample_rate_hz, size_t max_decoded_bytes,
int16_t* decoded, SpeechType* speech_type) {

View File

@ -13,7 +13,13 @@
#include <stdlib.h> // NULL
#include <memory>
#include <vector>
#include "webrtc/base/array_view.h"
#include "webrtc/base/buffer.h"
#include "webrtc/base/constructormagic.h"
#include "webrtc/base/optional.h"
#include "webrtc/typedefs.h"
namespace webrtc {
@ -33,6 +39,55 @@ class AudioDecoder {
AudioDecoder() = default;
virtual ~AudioDecoder() = default;
class EncodedAudioFrame {
public:
struct DecodeResult {
size_t num_decoded_samples;
SpeechType speech_type;
};
virtual ~EncodedAudioFrame() = default;
// Returns the duration in samples-per-channel of this audio frame.
// If no duration can be ascertained, returns zero.
virtual size_t Duration() const = 0;
// Decodes this frame of audio and writes the result in |decoded|.
// |decoded| must be large enough to store as many samples as indicated by a
// call to Duration() . On success, returns an rtc::Optional containing the
// total number of samples across all channels, as well as whether the
// decoder produced comfort noise or speech. On failure, returns an empty
// rtc::Optional. Decode may be called at most once per frame object.
virtual rtc::Optional<DecodeResult> Decode(
rtc::ArrayView<int16_t> decoded) const = 0;
};
struct ParseResult {
ParseResult();
ParseResult(uint32_t timestamp,
bool primary,
std::unique_ptr<EncodedAudioFrame> frame);
ParseResult(ParseResult&& b);
~ParseResult();
ParseResult& operator=(ParseResult&& b);
// The timestamp of the frame is in samples per channel.
uint32_t timestamp;
bool primary;
std::unique_ptr<EncodedAudioFrame> frame;
};
// Let the decoder parse this payload and prepare zero or more decodable
// frames. Each frame must be between 10 ms and 120 ms long. The caller must
// ensure that the AudioDecoder object outlives any frame objects returned by
// this call. The decoder is free to swap or move the data from the |payload|
// buffer. |timestamp| is the input timestamp, in samples, corresponding to
// the start of the payload.
virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary);
// Decodes |encode_len| bytes from |encoded| and writes the result in
// |decoded|. The maximum bytes allowed to be written into |decoded| is
// |max_decoded_bytes|. Returns the total number of samples across all