Move AudioFrame to its own header file and target in api/.
This breaks the dependency api:audio_mixer_api --> modules:module_api, and allows peerconnectioninterface.h to include audio_mixer.h, without introducing a dependency cycle. In addition, un-inline all AudioFrame methods, moving implementations to audio_frame.cc, and replace assert by RTC_CHECK_*. Bug: webrtc:7504 Change-Id: I11e3d3d22716e9b98976bf830103fbb06e7bbb77 Reviewed-on: https://webrtc-review.googlesource.com/51860 Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org> Reviewed-by: Fredrik Solenberg <solenberg@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Commit-Queue: Niels Moller <nisse@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22016}
This commit is contained in:
18
api/BUILD.gn
18
api/BUILD.gn
@ -92,6 +92,7 @@ rtc_static_library("libjingle_peerconnection_api") {
|
|||||||
|
|
||||||
deps = [
|
deps = [
|
||||||
":array_view",
|
":array_view",
|
||||||
|
":audio_mixer_api",
|
||||||
":audio_options_api",
|
":audio_options_api",
|
||||||
":optional",
|
":optional",
|
||||||
":peerconnection_and_implicit_call_api",
|
":peerconnection_and_implicit_call_api",
|
||||||
@ -188,6 +189,21 @@ rtc_source_set("rtc_stats_api") {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rtc_source_set("audio_frame_api") {
|
||||||
|
visibility = [ "*" ]
|
||||||
|
sources = [
|
||||||
|
"audio/audio_frame.cc",
|
||||||
|
"audio/audio_frame.h",
|
||||||
|
]
|
||||||
|
|
||||||
|
deps = [
|
||||||
|
"../:typedefs",
|
||||||
|
"../rtc_base:checks",
|
||||||
|
"../rtc_base:deprecation",
|
||||||
|
"../rtc_base:rtc_base_approved",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
rtc_source_set("audio_mixer_api") {
|
rtc_source_set("audio_mixer_api") {
|
||||||
visibility = [ "*" ]
|
visibility = [ "*" ]
|
||||||
sources = [
|
sources = [
|
||||||
@ -195,7 +211,7 @@ rtc_source_set("audio_mixer_api") {
|
|||||||
]
|
]
|
||||||
|
|
||||||
deps = [
|
deps = [
|
||||||
"../modules:module_api",
|
":audio_frame_api",
|
||||||
"../rtc_base:rtc_base_approved",
|
"../rtc_base:rtc_base_approved",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
183
api/audio/audio_frame.cc
Normal file
183
api/audio/audio_frame.cc
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "api/audio/audio_frame.h"
|
||||||
|
|
||||||
|
#include "rtc_base/checks.h"
|
||||||
|
#include "rtc_base/numerics/safe_conversions.h"
|
||||||
|
#include "rtc_base/timeutils.h"
|
||||||
|
|
||||||
|
namespace webrtc {
|
||||||
|
|
||||||
|
AudioFrame::AudioFrame() {
|
||||||
|
// Visual Studio doesn't like this in the class definition.
|
||||||
|
static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
|
||||||
|
}
|
||||||
|
|
||||||
|
void AudioFrame::Reset() {
|
||||||
|
ResetWithoutMuting();
|
||||||
|
muted_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AudioFrame::ResetWithoutMuting() {
|
||||||
|
// TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
|
||||||
|
// to an invalid value, or add a new member to indicate invalidity.
|
||||||
|
timestamp_ = 0;
|
||||||
|
elapsed_time_ms_ = -1;
|
||||||
|
ntp_time_ms_ = -1;
|
||||||
|
samples_per_channel_ = 0;
|
||||||
|
sample_rate_hz_ = 0;
|
||||||
|
num_channels_ = 0;
|
||||||
|
speech_type_ = kUndefined;
|
||||||
|
vad_activity_ = kVadUnknown;
|
||||||
|
profile_timestamp_ms_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AudioFrame::UpdateFrame(uint32_t timestamp,
|
||||||
|
const int16_t* data,
|
||||||
|
size_t samples_per_channel,
|
||||||
|
int sample_rate_hz,
|
||||||
|
SpeechType speech_type,
|
||||||
|
VADActivity vad_activity,
|
||||||
|
size_t num_channels) {
|
||||||
|
timestamp_ = timestamp;
|
||||||
|
samples_per_channel_ = samples_per_channel;
|
||||||
|
sample_rate_hz_ = sample_rate_hz;
|
||||||
|
speech_type_ = speech_type;
|
||||||
|
vad_activity_ = vad_activity;
|
||||||
|
num_channels_ = num_channels;
|
||||||
|
|
||||||
|
const size_t length = samples_per_channel * num_channels;
|
||||||
|
RTC_CHECK_LE(length, kMaxDataSizeSamples);
|
||||||
|
if (data != nullptr) {
|
||||||
|
memcpy(data_, data, sizeof(int16_t) * length);
|
||||||
|
muted_ = false;
|
||||||
|
} else {
|
||||||
|
muted_ = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AudioFrame::CopyFrom(const AudioFrame& src) {
|
||||||
|
if (this == &src) return;
|
||||||
|
|
||||||
|
timestamp_ = src.timestamp_;
|
||||||
|
elapsed_time_ms_ = src.elapsed_time_ms_;
|
||||||
|
ntp_time_ms_ = src.ntp_time_ms_;
|
||||||
|
muted_ = src.muted();
|
||||||
|
samples_per_channel_ = src.samples_per_channel_;
|
||||||
|
sample_rate_hz_ = src.sample_rate_hz_;
|
||||||
|
speech_type_ = src.speech_type_;
|
||||||
|
vad_activity_ = src.vad_activity_;
|
||||||
|
num_channels_ = src.num_channels_;
|
||||||
|
|
||||||
|
const size_t length = samples_per_channel_ * num_channels_;
|
||||||
|
RTC_CHECK_LE(length, kMaxDataSizeSamples);
|
||||||
|
if (!src.muted()) {
|
||||||
|
memcpy(data_, src.data(), sizeof(int16_t) * length);
|
||||||
|
muted_ = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AudioFrame::UpdateProfileTimeStamp() {
|
||||||
|
profile_timestamp_ms_ = rtc::TimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t AudioFrame::ElapsedProfileTimeMs() const {
|
||||||
|
if (profile_timestamp_ms_ == 0) {
|
||||||
|
// Profiling has not been activated.
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return rtc::TimeSince(profile_timestamp_ms_);
|
||||||
|
}
|
||||||
|
|
||||||
|
const int16_t* AudioFrame::data() const {
|
||||||
|
return muted_ ? empty_data() : data_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(henrik.lundin) Can we skip zeroing the buffer?
|
||||||
|
// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
|
||||||
|
int16_t* AudioFrame::mutable_data() {
|
||||||
|
if (muted_) {
|
||||||
|
memset(data_, 0, kMaxDataSizeBytes);
|
||||||
|
muted_ = false;
|
||||||
|
}
|
||||||
|
return data_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AudioFrame::Mute() {
|
||||||
|
muted_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AudioFrame::muted() const { return muted_; }
|
||||||
|
|
||||||
|
AudioFrame& AudioFrame::operator>>=(const int rhs) {
|
||||||
|
RTC_CHECK_GT(num_channels_, 0);
|
||||||
|
RTC_CHECK_LT(num_channels_, 3);
|
||||||
|
if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
|
||||||
|
if (muted_) return *this;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
|
||||||
|
data_[i] = static_cast<int16_t>(data_[i] >> rhs);
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
|
||||||
|
// Sanity check
|
||||||
|
RTC_CHECK_GT(num_channels_, 0);
|
||||||
|
RTC_CHECK_LT(num_channels_, 3);
|
||||||
|
if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
|
||||||
|
if (num_channels_ != rhs.num_channels_) return *this;
|
||||||
|
|
||||||
|
bool noPrevData = muted_;
|
||||||
|
if (samples_per_channel_ != rhs.samples_per_channel_) {
|
||||||
|
if (samples_per_channel_ == 0) {
|
||||||
|
// special case we have no data to start with
|
||||||
|
samples_per_channel_ = rhs.samples_per_channel_;
|
||||||
|
noPrevData = true;
|
||||||
|
} else {
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
|
||||||
|
vad_activity_ = kVadActive;
|
||||||
|
} else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
|
||||||
|
vad_activity_ = kVadUnknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
|
||||||
|
|
||||||
|
if (!rhs.muted()) {
|
||||||
|
muted_ = false;
|
||||||
|
if (noPrevData) {
|
||||||
|
memcpy(data_, rhs.data(),
|
||||||
|
sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
|
||||||
|
} else {
|
||||||
|
// IMPROVEMENT this can be done very fast in assembly
|
||||||
|
for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
|
||||||
|
int32_t wrap_guard =
|
||||||
|
static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
|
||||||
|
data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
// static
|
||||||
|
const int16_t* AudioFrame::empty_data() {
|
||||||
|
static const int16_t kEmptyData[kMaxDataSizeSamples] = {0};
|
||||||
|
static_assert(sizeof(kEmptyData) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
|
||||||
|
return kEmptyData;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace webrtc
|
||||||
152
api/audio/audio_frame.h
Normal file
152
api/audio/audio_frame.h
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef API_AUDIO_AUDIO_FRAME_H_
|
||||||
|
#define API_AUDIO_AUDIO_FRAME_H_
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "rtc_base/constructormagic.h"
|
||||||
|
#include "rtc_base/deprecation.h"
|
||||||
|
#include "typedefs.h" // NOLINT(build/include)
|
||||||
|
|
||||||
|
namespace webrtc {
|
||||||
|
|
||||||
|
/* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It
|
||||||
|
* allows for adding and subtracting frames while keeping track of the resulting
|
||||||
|
* states.
|
||||||
|
*
|
||||||
|
* Notes
|
||||||
|
* - This is a de-facto api, not designed for external use. The AudioFrame class
|
||||||
|
* is in need of overhaul or even replacement, and anyone depending on it
|
||||||
|
* should be prepared for that.
|
||||||
|
* - The total number of samples is samples_per_channel_ * num_channels_.
|
||||||
|
* - Stereo data is interleaved starting with the left channel.
|
||||||
|
*/
|
||||||
|
class AudioFrame {
|
||||||
|
public:
|
||||||
|
// Using constexpr here causes linker errors unless the variable also has an
|
||||||
|
// out-of-class definition, which is impractical in this header-only class.
|
||||||
|
// (This makes no sense because it compiles as an enum value, which we most
|
||||||
|
// certainly cannot take the address of, just fine.) C++17 introduces inline
|
||||||
|
// variables which should allow us to switch to constexpr and keep this a
|
||||||
|
// header-only class.
|
||||||
|
enum : size_t {
|
||||||
|
// Stereo, 32 kHz, 60 ms (2 * 32 * 60)
|
||||||
|
kMaxDataSizeSamples = 3840,
|
||||||
|
kMaxDataSizeBytes = kMaxDataSizeSamples * sizeof(int16_t),
|
||||||
|
};
|
||||||
|
|
||||||
|
enum VADActivity {
|
||||||
|
kVadActive = 0,
|
||||||
|
kVadPassive = 1,
|
||||||
|
kVadUnknown = 2
|
||||||
|
};
|
||||||
|
enum SpeechType {
|
||||||
|
kNormalSpeech = 0,
|
||||||
|
kPLC = 1,
|
||||||
|
kCNG = 2,
|
||||||
|
kPLCCNG = 3,
|
||||||
|
kUndefined = 4
|
||||||
|
};
|
||||||
|
|
||||||
|
AudioFrame();
|
||||||
|
|
||||||
|
// Resets all members to their default state.
|
||||||
|
void Reset();
|
||||||
|
// Same as Reset(), but leaves mute state unchanged. Muting a frame requires
|
||||||
|
// the buffer to be zeroed on the next call to mutable_data(). Callers
|
||||||
|
// intending to write to the buffer immediately after Reset() can instead use
|
||||||
|
// ResetWithoutMuting() to skip this wasteful zeroing.
|
||||||
|
void ResetWithoutMuting();
|
||||||
|
|
||||||
|
// TODO(solenberg): Remove once downstream users of AudioFrame have updated.
|
||||||
|
RTC_DEPRECATED
|
||||||
|
void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
|
||||||
|
size_t samples_per_channel, int sample_rate_hz,
|
||||||
|
SpeechType speech_type, VADActivity vad_activity,
|
||||||
|
size_t num_channels = 1) {
|
||||||
|
RTC_UNUSED(id);
|
||||||
|
UpdateFrame(timestamp, data, samples_per_channel, sample_rate_hz,
|
||||||
|
speech_type, vad_activity, num_channels);
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateFrame(uint32_t timestamp, const int16_t* data,
|
||||||
|
size_t samples_per_channel, int sample_rate_hz,
|
||||||
|
SpeechType speech_type, VADActivity vad_activity,
|
||||||
|
size_t num_channels = 1);
|
||||||
|
|
||||||
|
void CopyFrom(const AudioFrame& src);
|
||||||
|
|
||||||
|
// Sets a wall-time clock timestamp in milliseconds to be used for profiling
|
||||||
|
// of time between two points in the audio chain.
|
||||||
|
// Example:
|
||||||
|
// t0: UpdateProfileTimeStamp()
|
||||||
|
// t1: ElapsedProfileTimeMs() => t1 - t0 [msec]
|
||||||
|
void UpdateProfileTimeStamp();
|
||||||
|
// Returns the time difference between now and when UpdateProfileTimeStamp()
|
||||||
|
// was last called. Returns -1 if UpdateProfileTimeStamp() has not yet been
|
||||||
|
// called.
|
||||||
|
int64_t ElapsedProfileTimeMs() const;
|
||||||
|
|
||||||
|
// data() returns a zeroed static buffer if the frame is muted.
|
||||||
|
// mutable_frame() always returns a non-static buffer; the first call to
|
||||||
|
// mutable_frame() zeros the non-static buffer and marks the frame unmuted.
|
||||||
|
const int16_t* data() const;
|
||||||
|
int16_t* mutable_data();
|
||||||
|
|
||||||
|
// Prefer to mute frames using AudioFrameOperations::Mute.
|
||||||
|
void Mute();
|
||||||
|
// Frame is muted by default.
|
||||||
|
bool muted() const;
|
||||||
|
|
||||||
|
// These methods are deprecated. Use the functions in
|
||||||
|
// webrtc/audio/utility instead. These methods will exists for a
|
||||||
|
// short period of time until webrtc clients have updated. See
|
||||||
|
// webrtc:6548 for details.
|
||||||
|
RTC_DEPRECATED AudioFrame& operator>>=(const int rhs);
|
||||||
|
RTC_DEPRECATED AudioFrame& operator+=(const AudioFrame& rhs);
|
||||||
|
|
||||||
|
// RTP timestamp of the first sample in the AudioFrame.
|
||||||
|
uint32_t timestamp_ = 0;
|
||||||
|
// Time since the first frame in milliseconds.
|
||||||
|
// -1 represents an uninitialized value.
|
||||||
|
int64_t elapsed_time_ms_ = -1;
|
||||||
|
// NTP time of the estimated capture time in local timebase in milliseconds.
|
||||||
|
// -1 represents an uninitialized value.
|
||||||
|
int64_t ntp_time_ms_ = -1;
|
||||||
|
size_t samples_per_channel_ = 0;
|
||||||
|
int sample_rate_hz_ = 0;
|
||||||
|
size_t num_channels_ = 0;
|
||||||
|
SpeechType speech_type_ = kUndefined;
|
||||||
|
VADActivity vad_activity_ = kVadUnknown;
|
||||||
|
// Monotonically increasing timestamp intended for profiling of audio frames.
|
||||||
|
// Typically used for measuring elapsed time between two different points in
|
||||||
|
// the audio path. No lock is used to save resources and we are thread safe
|
||||||
|
// by design. Also, rtc::Optional is not used since it will cause a "complex
|
||||||
|
// class/struct needs an explicit out-of-line destructor" build error.
|
||||||
|
int64_t profile_timestamp_ms_ = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// A permamently zeroed out buffer to represent muted frames. This is a
|
||||||
|
// header-only class, so the only way to avoid creating a separate empty
|
||||||
|
// buffer per translation unit is to wrap a static in an inline function.
|
||||||
|
static const int16_t* empty_data();
|
||||||
|
|
||||||
|
int16_t data_[kMaxDataSizeSamples];
|
||||||
|
bool muted_ = true;
|
||||||
|
|
||||||
|
RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace webrtc
|
||||||
|
|
||||||
|
#endif // API_AUDIO_AUDIO_FRAME_H_
|
||||||
@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include "modules/include/module_common_types.h"
|
#include "api/audio/audio_frame.h"
|
||||||
#include "rtc_base/refcount.h"
|
#include "rtc_base/refcount.h"
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|||||||
@ -76,6 +76,7 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "api/audio/audio_mixer.h"
|
||||||
#include "api/audio_codecs/audio_decoder_factory.h"
|
#include "api/audio_codecs/audio_decoder_factory.h"
|
||||||
#include "api/audio_codecs/audio_encoder_factory.h"
|
#include "api/audio_codecs/audio_encoder_factory.h"
|
||||||
#include "api/audio_options.h"
|
#include "api/audio_options.h"
|
||||||
|
|||||||
@ -51,6 +51,7 @@ rtc_source_set("module_api") {
|
|||||||
":module_api_public",
|
":module_api_public",
|
||||||
"..:webrtc_common",
|
"..:webrtc_common",
|
||||||
"../:typedefs",
|
"../:typedefs",
|
||||||
|
"../api:audio_frame_api",
|
||||||
"../api:libjingle_peerconnection_api",
|
"../api:libjingle_peerconnection_api",
|
||||||
"../api:optional",
|
"../api:optional",
|
||||||
"../api:video_frame_api",
|
"../api:video_frame_api",
|
||||||
|
|||||||
@ -18,6 +18,9 @@
|
|||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
#include "api/optional.h"
|
#include "api/optional.h"
|
||||||
|
// TODO(bugs.webrtc.org/7504): Included here because users of this header expect
|
||||||
|
// it to declare AudioFrame. Delete as soon as all known users are updated.
|
||||||
|
#include "api/audio/audio_frame.h"
|
||||||
#include "api/rtp_headers.h"
|
#include "api/rtp_headers.h"
|
||||||
#include "api/video/video_rotation.h"
|
#include "api/video/video_rotation.h"
|
||||||
#include "common_types.h" // NOLINT(build/include)
|
#include "common_types.h" // NOLINT(build/include)
|
||||||
@ -288,289 +291,6 @@ class CallStatsObserver {
|
|||||||
virtual ~CallStatsObserver() {}
|
virtual ~CallStatsObserver() {}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It
|
|
||||||
* allows for adding and subtracting frames while keeping track of the resulting
|
|
||||||
* states.
|
|
||||||
*
|
|
||||||
* Notes
|
|
||||||
* - The total number of samples is samples_per_channel_ * num_channels_
|
|
||||||
* - Stereo data is interleaved starting with the left channel.
|
|
||||||
*/
|
|
||||||
class AudioFrame {
|
|
||||||
public:
|
|
||||||
// Using constexpr here causes linker errors unless the variable also has an
|
|
||||||
// out-of-class definition, which is impractical in this header-only class.
|
|
||||||
// (This makes no sense because it compiles as an enum value, which we most
|
|
||||||
// certainly cannot take the address of, just fine.) C++17 introduces inline
|
|
||||||
// variables which should allow us to switch to constexpr and keep this a
|
|
||||||
// header-only class.
|
|
||||||
enum : size_t {
|
|
||||||
// Stereo, 32 kHz, 60 ms (2 * 32 * 60)
|
|
||||||
kMaxDataSizeSamples = 3840,
|
|
||||||
kMaxDataSizeBytes = kMaxDataSizeSamples * sizeof(int16_t),
|
|
||||||
};
|
|
||||||
|
|
||||||
enum VADActivity {
|
|
||||||
kVadActive = 0,
|
|
||||||
kVadPassive = 1,
|
|
||||||
kVadUnknown = 2
|
|
||||||
};
|
|
||||||
enum SpeechType {
|
|
||||||
kNormalSpeech = 0,
|
|
||||||
kPLC = 1,
|
|
||||||
kCNG = 2,
|
|
||||||
kPLCCNG = 3,
|
|
||||||
kUndefined = 4
|
|
||||||
};
|
|
||||||
|
|
||||||
AudioFrame();
|
|
||||||
|
|
||||||
// Resets all members to their default state.
|
|
||||||
void Reset();
|
|
||||||
// Same as Reset(), but leaves mute state unchanged. Muting a frame requires
|
|
||||||
// the buffer to be zeroed on the next call to mutable_data(). Callers
|
|
||||||
// intending to write to the buffer immediately after Reset() can instead use
|
|
||||||
// ResetWithoutMuting() to skip this wasteful zeroing.
|
|
||||||
void ResetWithoutMuting();
|
|
||||||
|
|
||||||
// TODO(solenberg): Remove once downstream users of AudioFrame have updated.
|
|
||||||
RTC_DEPRECATED
|
|
||||||
void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
|
|
||||||
size_t samples_per_channel, int sample_rate_hz,
|
|
||||||
SpeechType speech_type, VADActivity vad_activity,
|
|
||||||
size_t num_channels = 1) {
|
|
||||||
RTC_UNUSED(id);
|
|
||||||
UpdateFrame(timestamp, data, samples_per_channel, sample_rate_hz,
|
|
||||||
speech_type, vad_activity, num_channels);
|
|
||||||
}
|
|
||||||
|
|
||||||
void UpdateFrame(uint32_t timestamp, const int16_t* data,
|
|
||||||
size_t samples_per_channel, int sample_rate_hz,
|
|
||||||
SpeechType speech_type, VADActivity vad_activity,
|
|
||||||
size_t num_channels = 1);
|
|
||||||
|
|
||||||
void CopyFrom(const AudioFrame& src);
|
|
||||||
|
|
||||||
// Sets a wall-time clock timestamp in milliseconds to be used for profiling
|
|
||||||
// of time between two points in the audio chain.
|
|
||||||
// Example:
|
|
||||||
// t0: UpdateProfileTimeStamp()
|
|
||||||
// t1: ElapsedProfileTimeMs() => t1 - t0 [msec]
|
|
||||||
void UpdateProfileTimeStamp();
|
|
||||||
// Returns the time difference between now and when UpdateProfileTimeStamp()
|
|
||||||
// was last called. Returns -1 if UpdateProfileTimeStamp() has not yet been
|
|
||||||
// called.
|
|
||||||
int64_t ElapsedProfileTimeMs() const;
|
|
||||||
|
|
||||||
// data() returns a zeroed static buffer if the frame is muted.
|
|
||||||
// mutable_frame() always returns a non-static buffer; the first call to
|
|
||||||
// mutable_frame() zeros the non-static buffer and marks the frame unmuted.
|
|
||||||
const int16_t* data() const;
|
|
||||||
int16_t* mutable_data();
|
|
||||||
|
|
||||||
// Prefer to mute frames using AudioFrameOperations::Mute.
|
|
||||||
void Mute();
|
|
||||||
// Frame is muted by default.
|
|
||||||
bool muted() const;
|
|
||||||
|
|
||||||
// These methods are deprecated. Use the functions in
|
|
||||||
// webrtc/audio/utility instead. These methods will exists for a
|
|
||||||
// short period of time until webrtc clients have updated. See
|
|
||||||
// webrtc:6548 for details.
|
|
||||||
RTC_DEPRECATED AudioFrame& operator>>=(const int rhs);
|
|
||||||
RTC_DEPRECATED AudioFrame& operator+=(const AudioFrame& rhs);
|
|
||||||
|
|
||||||
// RTP timestamp of the first sample in the AudioFrame.
|
|
||||||
uint32_t timestamp_ = 0;
|
|
||||||
// Time since the first frame in milliseconds.
|
|
||||||
// -1 represents an uninitialized value.
|
|
||||||
int64_t elapsed_time_ms_ = -1;
|
|
||||||
// NTP time of the estimated capture time in local timebase in milliseconds.
|
|
||||||
// -1 represents an uninitialized value.
|
|
||||||
int64_t ntp_time_ms_ = -1;
|
|
||||||
size_t samples_per_channel_ = 0;
|
|
||||||
int sample_rate_hz_ = 0;
|
|
||||||
size_t num_channels_ = 0;
|
|
||||||
SpeechType speech_type_ = kUndefined;
|
|
||||||
VADActivity vad_activity_ = kVadUnknown;
|
|
||||||
// Monotonically increasing timestamp intended for profiling of audio frames.
|
|
||||||
// Typically used for measuring elapsed time between two different points in
|
|
||||||
// the audio path. No lock is used to save resources and we are thread safe
|
|
||||||
// by design. Also, rtc::Optional is not used since it will cause a "complex
|
|
||||||
// class/struct needs an explicit out-of-line destructor" build error.
|
|
||||||
int64_t profile_timestamp_ms_ = 0;
|
|
||||||
|
|
||||||
private:
|
|
||||||
// A permamently zeroed out buffer to represent muted frames. This is a
|
|
||||||
// header-only class, so the only way to avoid creating a separate empty
|
|
||||||
// buffer per translation unit is to wrap a static in an inline function.
|
|
||||||
static const int16_t* empty_data() {
|
|
||||||
static const int16_t kEmptyData[kMaxDataSizeSamples] = {0};
|
|
||||||
static_assert(sizeof(kEmptyData) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
|
|
||||||
return kEmptyData;
|
|
||||||
}
|
|
||||||
|
|
||||||
int16_t data_[kMaxDataSizeSamples];
|
|
||||||
bool muted_ = true;
|
|
||||||
|
|
||||||
RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
|
|
||||||
};
|
|
||||||
|
|
||||||
inline AudioFrame::AudioFrame() {
|
|
||||||
// Visual Studio doesn't like this in the class definition.
|
|
||||||
static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void AudioFrame::Reset() {
|
|
||||||
ResetWithoutMuting();
|
|
||||||
muted_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void AudioFrame::ResetWithoutMuting() {
|
|
||||||
// TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
|
|
||||||
// to an invalid value, or add a new member to indicate invalidity.
|
|
||||||
timestamp_ = 0;
|
|
||||||
elapsed_time_ms_ = -1;
|
|
||||||
ntp_time_ms_ = -1;
|
|
||||||
samples_per_channel_ = 0;
|
|
||||||
sample_rate_hz_ = 0;
|
|
||||||
num_channels_ = 0;
|
|
||||||
speech_type_ = kUndefined;
|
|
||||||
vad_activity_ = kVadUnknown;
|
|
||||||
profile_timestamp_ms_ = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void AudioFrame::UpdateFrame(uint32_t timestamp,
|
|
||||||
const int16_t* data,
|
|
||||||
size_t samples_per_channel,
|
|
||||||
int sample_rate_hz,
|
|
||||||
SpeechType speech_type,
|
|
||||||
VADActivity vad_activity,
|
|
||||||
size_t num_channels) {
|
|
||||||
timestamp_ = timestamp;
|
|
||||||
samples_per_channel_ = samples_per_channel;
|
|
||||||
sample_rate_hz_ = sample_rate_hz;
|
|
||||||
speech_type_ = speech_type;
|
|
||||||
vad_activity_ = vad_activity;
|
|
||||||
num_channels_ = num_channels;
|
|
||||||
|
|
||||||
const size_t length = samples_per_channel * num_channels;
|
|
||||||
assert(length <= kMaxDataSizeSamples);
|
|
||||||
if (data != nullptr) {
|
|
||||||
memcpy(data_, data, sizeof(int16_t) * length);
|
|
||||||
muted_ = false;
|
|
||||||
} else {
|
|
||||||
muted_ = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void AudioFrame::CopyFrom(const AudioFrame& src) {
|
|
||||||
if (this == &src) return;
|
|
||||||
|
|
||||||
timestamp_ = src.timestamp_;
|
|
||||||
elapsed_time_ms_ = src.elapsed_time_ms_;
|
|
||||||
ntp_time_ms_ = src.ntp_time_ms_;
|
|
||||||
muted_ = src.muted();
|
|
||||||
samples_per_channel_ = src.samples_per_channel_;
|
|
||||||
sample_rate_hz_ = src.sample_rate_hz_;
|
|
||||||
speech_type_ = src.speech_type_;
|
|
||||||
vad_activity_ = src.vad_activity_;
|
|
||||||
num_channels_ = src.num_channels_;
|
|
||||||
|
|
||||||
const size_t length = samples_per_channel_ * num_channels_;
|
|
||||||
assert(length <= kMaxDataSizeSamples);
|
|
||||||
if (!src.muted()) {
|
|
||||||
memcpy(data_, src.data(), sizeof(int16_t) * length);
|
|
||||||
muted_ = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void AudioFrame::UpdateProfileTimeStamp() {
|
|
||||||
profile_timestamp_ms_ = rtc::TimeMillis();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int64_t AudioFrame::ElapsedProfileTimeMs() const {
|
|
||||||
if (profile_timestamp_ms_ == 0) {
|
|
||||||
// Profiling has not been activated.
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return rtc::TimeSince(profile_timestamp_ms_);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const int16_t* AudioFrame::data() const {
|
|
||||||
return muted_ ? empty_data() : data_;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(henrik.lundin) Can we skip zeroing the buffer?
|
|
||||||
// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
|
|
||||||
inline int16_t* AudioFrame::mutable_data() {
|
|
||||||
if (muted_) {
|
|
||||||
memset(data_, 0, kMaxDataSizeBytes);
|
|
||||||
muted_ = false;
|
|
||||||
}
|
|
||||||
return data_;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void AudioFrame::Mute() {
|
|
||||||
muted_ = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool AudioFrame::muted() const { return muted_; }
|
|
||||||
|
|
||||||
inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
|
|
||||||
assert((num_channels_ > 0) && (num_channels_ < 3));
|
|
||||||
if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
|
|
||||||
if (muted_) return *this;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
|
|
||||||
data_[i] = static_cast<int16_t>(data_[i] >> rhs);
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
|
|
||||||
// Sanity check
|
|
||||||
assert((num_channels_ > 0) && (num_channels_ < 3));
|
|
||||||
if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
|
|
||||||
if (num_channels_ != rhs.num_channels_) return *this;
|
|
||||||
|
|
||||||
bool noPrevData = muted_;
|
|
||||||
if (samples_per_channel_ != rhs.samples_per_channel_) {
|
|
||||||
if (samples_per_channel_ == 0) {
|
|
||||||
// special case we have no data to start with
|
|
||||||
samples_per_channel_ = rhs.samples_per_channel_;
|
|
||||||
noPrevData = true;
|
|
||||||
} else {
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
|
|
||||||
vad_activity_ = kVadActive;
|
|
||||||
} else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
|
|
||||||
vad_activity_ = kVadUnknown;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
|
|
||||||
|
|
||||||
if (!rhs.muted()) {
|
|
||||||
muted_ = false;
|
|
||||||
if (noPrevData) {
|
|
||||||
memcpy(data_, rhs.data(),
|
|
||||||
sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
|
|
||||||
} else {
|
|
||||||
// IMPROVEMENT this can be done very fast in assembly
|
|
||||||
for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
|
|
||||||
int32_t wrap_guard =
|
|
||||||
static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
|
|
||||||
data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct PacedPacketInfo {
|
struct PacedPacketInfo {
|
||||||
PacedPacketInfo() {}
|
PacedPacketInfo() {}
|
||||||
PacedPacketInfo(int probe_cluster_id,
|
PacedPacketInfo(int probe_cluster_id,
|
||||||
|
|||||||
Reference in New Issue
Block a user