Update a ton of audio code to use size_t more correctly and in general reduce
use of int16_t/uint16_t. This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects. This was be reviewed and approved in pieces: https://codereview.webrtc.org/1224093003 https://codereview.webrtc.org/1224123002 https://codereview.webrtc.org/1224163002 https://codereview.webrtc.org/1225133003 https://codereview.webrtc.org/1225173002 https://codereview.webrtc.org/1227163003 https://codereview.webrtc.org/1227203003 https://codereview.webrtc.org/1227213002 https://codereview.webrtc.org/1227893002 https://codereview.webrtc.org/1228793004 https://codereview.webrtc.org/1228803003 https://codereview.webrtc.org/1228823002 https://codereview.webrtc.org/1228823003 https://codereview.webrtc.org/1228843002 https://codereview.webrtc.org/1230693002 https://codereview.webrtc.org/1231713002 The change is being landed as TBR to all the folks who reviewed the above. BUG=chromium:81439 TEST=none R=andrew@webrtc.org, pbos@webrtc.org TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher Review URL: https://codereview.webrtc.org/1230503003 . Cr-Commit-Position: refs/heads/master@{#9768}
This commit is contained in:
@ -18,11 +18,11 @@ Accelerate::ReturnCodes Accelerate::Process(const int16_t* input,
|
||||
size_t input_length,
|
||||
bool fast_accelerate,
|
||||
AudioMultiVector* output,
|
||||
int16_t* length_change_samples) {
|
||||
size_t* length_change_samples) {
|
||||
// Input length must be (almost) 30 ms.
|
||||
static const int k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
|
||||
if (num_channels_ == 0 || static_cast<int>(input_length) / num_channels_ <
|
||||
(2 * k15ms - 1) * fs_mult_) {
|
||||
static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
|
||||
if (num_channels_ == 0 ||
|
||||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) {
|
||||
// Length of input data too short to do accelerate. Simply move all data
|
||||
// from input to output.
|
||||
output->PushBackInterleaved(input, input_length);
|
||||
@ -34,7 +34,7 @@ Accelerate::ReturnCodes Accelerate::Process(const int16_t* input,
|
||||
|
||||
void Accelerate::SetParametersForPassiveSpeech(size_t /*len*/,
|
||||
int16_t* best_correlation,
|
||||
int* /*peak_index*/) const {
|
||||
size_t* /*peak_index*/) const {
|
||||
// When the signal does not contain any active speech, the correlation does
|
||||
// not matter. Simply set it to zero.
|
||||
*best_correlation = 0;
|
||||
|
||||
@ -45,14 +45,14 @@ class Accelerate : public TimeStretch {
|
||||
size_t input_length,
|
||||
bool fast_accelerate,
|
||||
AudioMultiVector* output,
|
||||
int16_t* length_change_samples);
|
||||
size_t* length_change_samples);
|
||||
|
||||
protected:
|
||||
// Sets the parameters |best_correlation| and |peak_index| to suitable
|
||||
// values when the signal contains no active speech.
|
||||
void SetParametersForPassiveSpeech(size_t len,
|
||||
int16_t* best_correlation,
|
||||
int* peak_index) const override;
|
||||
size_t* peak_index) const override;
|
||||
|
||||
// Checks the criteria for performing the time-stretching operation and,
|
||||
// if possible, performs the time-stretching.
|
||||
|
||||
@ -53,10 +53,9 @@ int AudioDecoderPcmU::DecodeInternal(const uint8_t* encoded,
|
||||
SpeechType* speech_type) {
|
||||
DCHECK_EQ(sample_rate_hz, 8000);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int16_t ret = WebRtcG711_DecodeU(encoded, static_cast<int16_t>(encoded_len),
|
||||
decoded, &temp_type);
|
||||
size_t ret = WebRtcG711_DecodeU(encoded, encoded_len, decoded, &temp_type);
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
return ret;
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded,
|
||||
@ -85,10 +84,9 @@ int AudioDecoderPcmA::DecodeInternal(const uint8_t* encoded,
|
||||
SpeechType* speech_type) {
|
||||
DCHECK_EQ(sample_rate_hz, 8000);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int16_t ret = WebRtcG711_DecodeA(encoded, static_cast<int16_t>(encoded_len),
|
||||
decoded, &temp_type);
|
||||
size_t ret = WebRtcG711_DecodeA(encoded, encoded_len, decoded, &temp_type);
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
return ret;
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded,
|
||||
@ -120,10 +118,9 @@ int AudioDecoderPcm16B::DecodeInternal(const uint8_t* encoded,
|
||||
DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 ||
|
||||
sample_rate_hz == 32000 || sample_rate_hz == 48000)
|
||||
<< "Unsupported sample rate " << sample_rate_hz;
|
||||
int16_t ret =
|
||||
WebRtcPcm16b_Decode(encoded, static_cast<int16_t>(encoded_len), decoded);
|
||||
size_t ret = WebRtcPcm16b_Decode(encoded, encoded_len, decoded);
|
||||
*speech_type = ConvertSpeechType(1);
|
||||
return ret;
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded,
|
||||
@ -132,7 +129,7 @@ int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded,
|
||||
return static_cast<int>(encoded_len / (2 * Channels()));
|
||||
}
|
||||
|
||||
AudioDecoderPcm16BMultiCh::AudioDecoderPcm16BMultiCh(int num_channels)
|
||||
AudioDecoderPcm16BMultiCh::AudioDecoderPcm16BMultiCh(size_t num_channels)
|
||||
: channels_(num_channels) {
|
||||
DCHECK(num_channels > 0);
|
||||
}
|
||||
@ -163,14 +160,13 @@ int AudioDecoderIlbc::DecodeInternal(const uint8_t* encoded,
|
||||
SpeechType* speech_type) {
|
||||
DCHECK_EQ(sample_rate_hz, 8000);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int ret = WebRtcIlbcfix_Decode(dec_state_, encoded,
|
||||
static_cast<int16_t>(encoded_len), decoded,
|
||||
int ret = WebRtcIlbcfix_Decode(dec_state_, encoded, encoded_len, decoded,
|
||||
&temp_type);
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int AudioDecoderIlbc::DecodePlc(int num_frames, int16_t* decoded) {
|
||||
size_t AudioDecoderIlbc::DecodePlc(size_t num_frames, int16_t* decoded) {
|
||||
return WebRtcIlbcfix_NetEqPlc(dec_state_, decoded, num_frames);
|
||||
}
|
||||
|
||||
@ -204,11 +200,10 @@ int AudioDecoderG722::DecodeInternal(const uint8_t* encoded,
|
||||
SpeechType* speech_type) {
|
||||
DCHECK_EQ(sample_rate_hz, 16000);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int16_t ret =
|
||||
WebRtcG722_Decode(dec_state_, encoded, static_cast<int16_t>(encoded_len),
|
||||
decoded, &temp_type);
|
||||
size_t ret =
|
||||
WebRtcG722_Decode(dec_state_, encoded, encoded_len, decoded, &temp_type);
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
return ret;
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
int AudioDecoderG722::Init() {
|
||||
@ -246,29 +241,24 @@ int AudioDecoderG722Stereo::DecodeInternal(const uint8_t* encoded,
|
||||
uint8_t* encoded_deinterleaved = new uint8_t[encoded_len];
|
||||
SplitStereoPacket(encoded, encoded_len, encoded_deinterleaved);
|
||||
// Decode left and right.
|
||||
int16_t ret = WebRtcG722_Decode(dec_state_left_, encoded_deinterleaved,
|
||||
static_cast<int16_t>(encoded_len / 2),
|
||||
decoded, &temp_type);
|
||||
if (ret >= 0) {
|
||||
int decoded_len = ret;
|
||||
ret = WebRtcG722_Decode(dec_state_right_,
|
||||
&encoded_deinterleaved[encoded_len / 2],
|
||||
static_cast<int16_t>(encoded_len / 2),
|
||||
&decoded[decoded_len], &temp_type);
|
||||
if (ret == decoded_len) {
|
||||
ret += decoded_len; // Return total number of samples.
|
||||
// Interleave output.
|
||||
for (int k = ret / 2; k < ret; k++) {
|
||||
int16_t temp = decoded[k];
|
||||
memmove(&decoded[2 * k - ret + 2], &decoded[2 * k - ret + 1],
|
||||
(ret - k - 1) * sizeof(int16_t));
|
||||
decoded[2 * k - ret + 1] = temp;
|
||||
}
|
||||
size_t decoded_len = WebRtcG722_Decode(dec_state_left_, encoded_deinterleaved,
|
||||
encoded_len / 2, decoded, &temp_type);
|
||||
size_t ret = WebRtcG722_Decode(
|
||||
dec_state_right_, &encoded_deinterleaved[encoded_len / 2],
|
||||
encoded_len / 2, &decoded[decoded_len], &temp_type);
|
||||
if (ret == decoded_len) {
|
||||
ret += decoded_len; // Return total number of samples.
|
||||
// Interleave output.
|
||||
for (size_t k = ret / 2; k < ret; k++) {
|
||||
int16_t temp = decoded[k];
|
||||
memmove(&decoded[2 * k - ret + 2], &decoded[2 * k - ret + 1],
|
||||
(ret - k - 1) * sizeof(int16_t));
|
||||
decoded[2 * k - ret + 1] = temp;
|
||||
}
|
||||
}
|
||||
*speech_type = ConvertSpeechType(temp_type);
|
||||
delete [] encoded_deinterleaved;
|
||||
return ret;
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
size_t AudioDecoderG722Stereo::Channels() const {
|
||||
@ -312,7 +302,8 @@ void AudioDecoderG722Stereo::SplitStereoPacket(const uint8_t* encoded,
|
||||
|
||||
// Opus
|
||||
#ifdef WEBRTC_CODEC_OPUS
|
||||
AudioDecoderOpus::AudioDecoderOpus(int num_channels) : channels_(num_channels) {
|
||||
AudioDecoderOpus::AudioDecoderOpus(size_t num_channels)
|
||||
: channels_(num_channels) {
|
||||
DCHECK(num_channels == 1 || num_channels == 2);
|
||||
WebRtcOpus_DecoderCreate(&dec_state_, static_cast<int>(channels_));
|
||||
}
|
||||
@ -328,8 +319,7 @@ int AudioDecoderOpus::DecodeInternal(const uint8_t* encoded,
|
||||
SpeechType* speech_type) {
|
||||
DCHECK_EQ(sample_rate_hz, 48000);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int ret = WebRtcOpus_Decode(dec_state_, encoded,
|
||||
static_cast<int16_t>(encoded_len), decoded,
|
||||
int ret = WebRtcOpus_Decode(dec_state_, encoded, encoded_len, decoded,
|
||||
&temp_type);
|
||||
if (ret > 0)
|
||||
ret *= static_cast<int>(channels_); // Return total number of samples.
|
||||
@ -350,8 +340,7 @@ int AudioDecoderOpus::DecodeRedundantInternal(const uint8_t* encoded,
|
||||
|
||||
DCHECK_EQ(sample_rate_hz, 48000);
|
||||
int16_t temp_type = 1; // Default is speech.
|
||||
int ret = WebRtcOpus_DecodeFec(dec_state_, encoded,
|
||||
static_cast<int16_t>(encoded_len), decoded,
|
||||
int ret = WebRtcOpus_DecodeFec(dec_state_, encoded, encoded_len, decoded,
|
||||
&temp_type);
|
||||
if (ret > 0)
|
||||
ret *= static_cast<int>(channels_); // Return total number of samples.
|
||||
@ -365,8 +354,7 @@ int AudioDecoderOpus::Init() {
|
||||
|
||||
int AudioDecoderOpus::PacketDuration(const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
return WebRtcOpus_DurationEst(dec_state_,
|
||||
encoded, static_cast<int>(encoded_len));
|
||||
return WebRtcOpus_DurationEst(dec_state_, encoded, encoded_len);
|
||||
}
|
||||
|
||||
int AudioDecoderOpus::PacketDurationRedundant(const uint8_t* encoded,
|
||||
@ -376,13 +364,13 @@ int AudioDecoderOpus::PacketDurationRedundant(const uint8_t* encoded,
|
||||
return PacketDuration(encoded, encoded_len);
|
||||
}
|
||||
|
||||
return WebRtcOpus_FecDurationEst(encoded, static_cast<int>(encoded_len));
|
||||
return WebRtcOpus_FecDurationEst(encoded, encoded_len);
|
||||
}
|
||||
|
||||
bool AudioDecoderOpus::PacketHasFec(const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
int fec;
|
||||
fec = WebRtcOpus_PacketHasFec(encoded, static_cast<int>(encoded_len));
|
||||
fec = WebRtcOpus_PacketHasFec(encoded, encoded_len);
|
||||
return (fec == 1);
|
||||
}
|
||||
|
||||
|
||||
@ -122,7 +122,7 @@ class AudioDecoderPcm16B : public AudioDecoder {
|
||||
// of channels is derived from the type.
|
||||
class AudioDecoderPcm16BMultiCh : public AudioDecoderPcm16B {
|
||||
public:
|
||||
explicit AudioDecoderPcm16BMultiCh(int num_channels);
|
||||
explicit AudioDecoderPcm16BMultiCh(size_t num_channels);
|
||||
size_t Channels() const override;
|
||||
|
||||
private:
|
||||
@ -137,7 +137,7 @@ class AudioDecoderIlbc : public AudioDecoder {
|
||||
AudioDecoderIlbc();
|
||||
~AudioDecoderIlbc() override;
|
||||
bool HasDecodePlc() const override;
|
||||
int DecodePlc(int num_frames, int16_t* decoded) override;
|
||||
size_t DecodePlc(size_t num_frames, int16_t* decoded) override;
|
||||
int Init() override;
|
||||
size_t Channels() const override;
|
||||
|
||||
@ -209,7 +209,7 @@ class AudioDecoderG722Stereo : public AudioDecoder {
|
||||
#ifdef WEBRTC_CODEC_OPUS
|
||||
class AudioDecoderOpus : public AudioDecoder {
|
||||
public:
|
||||
explicit AudioDecoderOpus(int num_channels);
|
||||
explicit AudioDecoderOpus(size_t num_channels);
|
||||
~AudioDecoderOpus() override;
|
||||
|
||||
int Init() override;
|
||||
|
||||
@ -141,7 +141,7 @@ class AudioDecoderTest : public ::testing::Test {
|
||||
input_len_samples);
|
||||
rtc::scoped_ptr<int16_t[]> interleaved_input(
|
||||
new int16_t[channels_ * samples_per_10ms]);
|
||||
for (int i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) {
|
||||
for (size_t i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) {
|
||||
EXPECT_EQ(0u, encoded_info_.encoded_bytes);
|
||||
|
||||
// Duplicate the mono input signal to however many channels the test
|
||||
@ -348,7 +348,7 @@ class AudioDecoderIlbcTest : public AudioDecoderTest {
|
||||
output.get(), &speech_type);
|
||||
EXPECT_EQ(frame_size_, dec_len);
|
||||
// Simply call DecodePlc and verify that we get 0 as return value.
|
||||
EXPECT_EQ(0, decoder_->DecodePlc(1, output.get()));
|
||||
EXPECT_EQ(0U, decoder_->DecodePlc(1, output.get()));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -21,6 +21,9 @@
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// static
|
||||
const size_t BackgroundNoise::kMaxLpcOrder;
|
||||
|
||||
BackgroundNoise::BackgroundNoise(size_t num_channels)
|
||||
: num_channels_(num_channels),
|
||||
channel_parameters_(new ChannelParameters[num_channels_]),
|
||||
@ -150,7 +153,7 @@ const int16_t* BackgroundNoise::FilterState(size_t channel) const {
|
||||
void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input,
|
||||
size_t length) {
|
||||
assert(channel < num_channels_);
|
||||
length = std::min(length, static_cast<size_t>(kMaxLpcOrder));
|
||||
length = std::min(length, kMaxLpcOrder);
|
||||
memcpy(channel_parameters_[channel].filter_state, input,
|
||||
length * sizeof(int16_t));
|
||||
}
|
||||
@ -165,7 +168,7 @@ int16_t BackgroundNoise::ScaleShift(size_t channel) const {
|
||||
}
|
||||
|
||||
int32_t BackgroundNoise::CalculateAutoCorrelation(
|
||||
const int16_t* signal, int length, int32_t* auto_correlation) const {
|
||||
const int16_t* signal, size_t length, int32_t* auto_correlation) const {
|
||||
int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length);
|
||||
int correlation_scale = kLogVecLen -
|
||||
WebRtcSpl_NormW32(signal_max * signal_max);
|
||||
@ -247,7 +250,7 @@ void BackgroundNoise::SaveParameters(size_t channel,
|
||||
residual_energy = residual_energy << norm_shift;
|
||||
|
||||
// Calculate scale and shift factor.
|
||||
parameters.scale = WebRtcSpl_SqrtFloor(residual_energy);
|
||||
parameters.scale = static_cast<int16_t>(WebRtcSpl_SqrtFloor(residual_energy));
|
||||
// Add 13 to the |scale_shift_|, since the random numbers table is in
|
||||
// Q13.
|
||||
// TODO(hlundin): Move the "13" to where the |scale_shift_| is used?
|
||||
|
||||
@ -29,7 +29,7 @@ class BackgroundNoise {
|
||||
public:
|
||||
// TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10.
|
||||
// Will work anyway, but probably sound a little worse.
|
||||
static const int kMaxLpcOrder = 8; // 32000 / 8000 + 4.
|
||||
static const size_t kMaxLpcOrder = 8; // 32000 / 8000 + 4.
|
||||
|
||||
explicit BackgroundNoise(size_t num_channels);
|
||||
virtual ~BackgroundNoise();
|
||||
@ -76,9 +76,9 @@ class BackgroundNoise {
|
||||
|
||||
private:
|
||||
static const int kThresholdIncrement = 229; // 0.0035 in Q16.
|
||||
static const int kVecLen = 256;
|
||||
static const size_t kVecLen = 256;
|
||||
static const int kLogVecLen = 8; // log2(kVecLen).
|
||||
static const int kResidualLength = 64;
|
||||
static const size_t kResidualLength = 64;
|
||||
static const int16_t kLogResidualLength = 6; // log2(kResidualLength)
|
||||
|
||||
struct ChannelParameters {
|
||||
@ -112,7 +112,7 @@ class BackgroundNoise {
|
||||
};
|
||||
|
||||
int32_t CalculateAutoCorrelation(const int16_t* signal,
|
||||
int length,
|
||||
size_t length,
|
||||
int32_t* auto_correlation) const;
|
||||
|
||||
// Increments the energy threshold by a factor 1 + |kThresholdIncrement|.
|
||||
|
||||
@ -23,16 +23,16 @@ void BufferLevelFilter::Reset() {
|
||||
level_factor_ = 253;
|
||||
}
|
||||
|
||||
void BufferLevelFilter::Update(int buffer_size_packets,
|
||||
void BufferLevelFilter::Update(size_t buffer_size_packets,
|
||||
int time_stretched_samples,
|
||||
int packet_len_samples) {
|
||||
size_t packet_len_samples) {
|
||||
// Filter:
|
||||
// |filtered_current_level_| = |level_factor_| * |filtered_current_level_| +
|
||||
// (1 - |level_factor_|) * |buffer_size_packets|
|
||||
// |level_factor_| and |filtered_current_level_| are in Q8.
|
||||
// |buffer_size_packets| is in Q0.
|
||||
filtered_current_level_ = ((level_factor_ * filtered_current_level_) >> 8) +
|
||||
((256 - level_factor_) * buffer_size_packets);
|
||||
((256 - level_factor_) * static_cast<int>(buffer_size_packets));
|
||||
|
||||
// Account for time-scale operations (accelerate and pre-emptive expand).
|
||||
if (time_stretched_samples && packet_len_samples > 0) {
|
||||
@ -42,7 +42,7 @@ void BufferLevelFilter::Update(int buffer_size_packets,
|
||||
// Make sure that the filtered value remains non-negative.
|
||||
filtered_current_level_ = std::max(0,
|
||||
filtered_current_level_ -
|
||||
(time_stretched_samples << 8) / packet_len_samples);
|
||||
(time_stretched_samples << 8) / static_cast<int>(packet_len_samples));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -11,6 +11,8 @@
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -26,8 +28,8 @@ class BufferLevelFilter {
|
||||
// corresponding number of packets, and is subtracted from the filtered
|
||||
// value (thus bypassing the filter operation). |packet_len_samples| is the
|
||||
// number of audio samples carried in each incoming packet.
|
||||
virtual void Update(int buffer_size_packets, int time_stretched_samples,
|
||||
int packet_len_samples);
|
||||
virtual void Update(size_t buffer_size_packets, int time_stretched_samples,
|
||||
size_t packet_len_samples);
|
||||
|
||||
// Set the current target buffer level (obtained from
|
||||
// DelayManager::base_target_level()). Used to select the appropriate
|
||||
|
||||
@ -79,8 +79,7 @@ int ComfortNoise::Generate(size_t requested_length,
|
||||
CNG_dec_inst* cng_inst = cng_decoder->CngDecoderInstance();
|
||||
// The expression &(*output)[0][0] is a pointer to the first element in
|
||||
// the first channel.
|
||||
if (WebRtcCng_Generate(cng_inst, &(*output)[0][0],
|
||||
static_cast<int16_t>(number_of_samples),
|
||||
if (WebRtcCng_Generate(cng_inst, &(*output)[0][0], number_of_samples,
|
||||
new_period) < 0) {
|
||||
// Error returned.
|
||||
output->Zeros(requested_length);
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
namespace webrtc {
|
||||
|
||||
DecisionLogic* DecisionLogic::Create(int fs_hz,
|
||||
int output_size_samples,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
@ -56,7 +56,7 @@ DecisionLogic* DecisionLogic::Create(int fs_hz,
|
||||
}
|
||||
|
||||
DecisionLogic::DecisionLogic(int fs_hz,
|
||||
int output_size_samples,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
@ -95,7 +95,7 @@ void DecisionLogic::SoftReset() {
|
||||
timescale_hold_off_ = kMinTimescaleInterval;
|
||||
}
|
||||
|
||||
void DecisionLogic::SetSampleRate(int fs_hz, int output_size_samples) {
|
||||
void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
|
||||
// TODO(hlundin): Change to an enumerator and skip assert.
|
||||
assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
|
||||
fs_mult_ = fs_hz / 8000;
|
||||
@ -104,7 +104,7 @@ void DecisionLogic::SetSampleRate(int fs_hz, int output_size_samples) {
|
||||
|
||||
Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
const RTPHeader* packet_header,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf, bool* reset_decoder) {
|
||||
@ -123,9 +123,9 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
const int samples_left = static_cast<int>(
|
||||
sync_buffer.FutureLength() - expand.overlap_length());
|
||||
const int cur_size_samples =
|
||||
const size_t samples_left =
|
||||
sync_buffer.FutureLength() - expand.overlap_length();
|
||||
const size_t cur_size_samples =
|
||||
samples_left + packet_buffer_.NumSamplesInBuffer(decoder_database_,
|
||||
decoder_frame_length);
|
||||
LOG(LS_VERBOSE) << "Buffers: " << packet_buffer_.NumPacketsInBuffer() <<
|
||||
@ -153,9 +153,10 @@ void DecisionLogic::ExpandDecision(Operations operation) {
|
||||
}
|
||||
}
|
||||
|
||||
void DecisionLogic::FilterBufferLevel(int buffer_size_samples,
|
||||
void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
|
||||
Modes prev_mode) {
|
||||
const int elapsed_time_ms = output_size_samples_ / (8 * fs_mult_);
|
||||
const int elapsed_time_ms =
|
||||
static_cast<int>(output_size_samples_ / (8 * fs_mult_));
|
||||
delay_manager_->UpdateCounters(elapsed_time_ms);
|
||||
|
||||
// Do not update buffer history if currently playing CNG since it will bias
|
||||
@ -164,7 +165,7 @@ void DecisionLogic::FilterBufferLevel(int buffer_size_samples,
|
||||
buffer_level_filter_->SetTargetBufferLevel(
|
||||
delay_manager_->base_target_level());
|
||||
|
||||
int buffer_size_packets = 0;
|
||||
size_t buffer_size_packets = 0;
|
||||
if (packet_length_samples_ > 0) {
|
||||
// Calculate size in packets.
|
||||
buffer_size_packets = buffer_size_samples / packet_length_samples_;
|
||||
|
||||
@ -34,7 +34,7 @@ class DecisionLogic {
|
||||
// Static factory function which creates different types of objects depending
|
||||
// on the |playout_mode|.
|
||||
static DecisionLogic* Create(int fs_hz,
|
||||
int output_size_samples,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
@ -43,7 +43,7 @@ class DecisionLogic {
|
||||
|
||||
// Constructor.
|
||||
DecisionLogic(int fs_hz,
|
||||
int output_size_samples,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
@ -60,7 +60,7 @@ class DecisionLogic {
|
||||
void SoftReset();
|
||||
|
||||
// Sets the sample rate and the output block size.
|
||||
void SetSampleRate(int fs_hz, int output_size_samples);
|
||||
void SetSampleRate(int fs_hz, size_t output_size_samples);
|
||||
|
||||
// Returns the operation that should be done next. |sync_buffer| and |expand|
|
||||
// are provided for reference. |decoder_frame_length| is the number of samples
|
||||
@ -75,7 +75,7 @@ class DecisionLogic {
|
||||
// return value.
|
||||
Operations GetDecision(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
const RTPHeader* packet_header,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
@ -101,12 +101,12 @@ class DecisionLogic {
|
||||
|
||||
// Accessors and mutators.
|
||||
void set_sample_memory(int32_t value) { sample_memory_ = value; }
|
||||
int generated_noise_samples() const { return generated_noise_samples_; }
|
||||
void set_generated_noise_samples(int value) {
|
||||
size_t generated_noise_samples() const { return generated_noise_samples_; }
|
||||
void set_generated_noise_samples(size_t value) {
|
||||
generated_noise_samples_ = value;
|
||||
}
|
||||
int packet_length_samples() const { return packet_length_samples_; }
|
||||
void set_packet_length_samples(int value) {
|
||||
size_t packet_length_samples() const { return packet_length_samples_; }
|
||||
void set_packet_length_samples(size_t value) {
|
||||
packet_length_samples_ = value;
|
||||
}
|
||||
void set_prev_time_scale(bool value) { prev_time_scale_ = value; }
|
||||
@ -134,7 +134,7 @@ class DecisionLogic {
|
||||
// Should be implemented by derived classes.
|
||||
virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
const RTPHeader* packet_header,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
@ -142,18 +142,18 @@ class DecisionLogic {
|
||||
|
||||
// Updates the |buffer_level_filter_| with the current buffer level
|
||||
// |buffer_size_packets|.
|
||||
void FilterBufferLevel(int buffer_size_packets, Modes prev_mode);
|
||||
void FilterBufferLevel(size_t buffer_size_packets, Modes prev_mode);
|
||||
|
||||
DecoderDatabase* decoder_database_;
|
||||
const PacketBuffer& packet_buffer_;
|
||||
DelayManager* delay_manager_;
|
||||
BufferLevelFilter* buffer_level_filter_;
|
||||
int fs_mult_;
|
||||
int output_size_samples_;
|
||||
size_t output_size_samples_;
|
||||
CngState cng_state_; // Remember if comfort noise is interrupted by other
|
||||
// event (e.g., DTMF).
|
||||
int generated_noise_samples_;
|
||||
int packet_length_samples_;
|
||||
size_t generated_noise_samples_;
|
||||
size_t packet_length_samples_;
|
||||
int sample_memory_;
|
||||
bool prev_time_scale_;
|
||||
int timescale_hold_off_;
|
||||
|
||||
@ -22,7 +22,7 @@ namespace webrtc {
|
||||
Operations DecisionLogicFax::GetDecisionSpecialized(
|
||||
const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
const RTPHeader* packet_header,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
|
||||
@ -23,7 +23,7 @@ class DecisionLogicFax : public DecisionLogic {
|
||||
public:
|
||||
// Constructor.
|
||||
DecisionLogicFax(int fs_hz,
|
||||
int output_size_samples,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
@ -46,7 +46,7 @@ class DecisionLogicFax : public DecisionLogic {
|
||||
// remain true if it was true before the call).
|
||||
Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
const RTPHeader* packet_header,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
|
||||
@ -27,7 +27,7 @@ namespace webrtc {
|
||||
Operations DecisionLogicNormal::GetDecisionSpecialized(
|
||||
const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
const RTPHeader* packet_header,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
@ -149,7 +149,7 @@ Operations DecisionLogicNormal::ExpectedPacketAvailable(Modes prev_mode,
|
||||
Operations DecisionLogicNormal::FuturePacketAvailable(
|
||||
const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
Modes prev_mode,
|
||||
uint32_t target_timestamp,
|
||||
uint32_t available_timestamp,
|
||||
@ -172,9 +172,9 @@ Operations DecisionLogicNormal::FuturePacketAvailable(
|
||||
}
|
||||
}
|
||||
|
||||
const int samples_left = static_cast<int>(sync_buffer.FutureLength() -
|
||||
expand.overlap_length());
|
||||
const int cur_size_samples = samples_left +
|
||||
const size_t samples_left =
|
||||
sync_buffer.FutureLength() - expand.overlap_length();
|
||||
const size_t cur_size_samples = samples_left +
|
||||
packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
|
||||
|
||||
// If previous was comfort noise, then no merge is needed.
|
||||
@ -205,7 +205,8 @@ Operations DecisionLogicNormal::FuturePacketAvailable(
|
||||
// fs_mult_ * 8 = fs / 1000.)
|
||||
if (prev_mode == kModeExpand ||
|
||||
(decoder_frame_length < output_size_samples_ &&
|
||||
cur_size_samples > kAllowMergeWithoutExpandMs * fs_mult_ * 8)) {
|
||||
cur_size_samples >
|
||||
static_cast<size_t>(kAllowMergeWithoutExpandMs * fs_mult_ * 8))) {
|
||||
return kMerge;
|
||||
} else if (play_dtmf) {
|
||||
// Play DTMF instead of expand.
|
||||
|
||||
@ -23,7 +23,7 @@ class DecisionLogicNormal : public DecisionLogic {
|
||||
public:
|
||||
// Constructor.
|
||||
DecisionLogicNormal(int fs_hz,
|
||||
int output_size_samples,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
@ -50,7 +50,7 @@ class DecisionLogicNormal : public DecisionLogic {
|
||||
// remain true if it was true before the call).
|
||||
Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
const RTPHeader* packet_header,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
@ -61,7 +61,7 @@ class DecisionLogicNormal : public DecisionLogic {
|
||||
virtual Operations FuturePacketAvailable(
|
||||
const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
int decoder_frame_length,
|
||||
size_t decoder_frame_length,
|
||||
Modes prev_mode,
|
||||
uint32_t target_timestamp,
|
||||
uint32_t available_timestamp,
|
||||
|
||||
@ -22,7 +22,7 @@
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
DelayManager::DelayManager(int max_packets_in_buffer,
|
||||
DelayManager::DelayManager(size_t max_packets_in_buffer,
|
||||
DelayPeakDetector* peak_detector)
|
||||
: first_packet_received_(false),
|
||||
max_packets_in_buffer_(max_packets_in_buffer),
|
||||
@ -239,7 +239,8 @@ void DelayManager::LimitTargetLevel() {
|
||||
}
|
||||
|
||||
// Shift to Q8, then 75%.;
|
||||
int max_buffer_packets_q8 = (3 * (max_packets_in_buffer_ << 8)) / 4;
|
||||
int max_buffer_packets_q8 =
|
||||
static_cast<int>((3 * (max_packets_in_buffer_ << 8)) / 4);
|
||||
target_level_ = std::min(target_level_, max_buffer_packets_q8);
|
||||
|
||||
// Sanity check, at least 1 packet (in Q8).
|
||||
@ -389,7 +390,8 @@ bool DelayManager::SetMinimumDelay(int delay_ms) {
|
||||
// |max_packets_in_buffer_|.
|
||||
if ((maximum_delay_ms_ > 0 && delay_ms > maximum_delay_ms_) ||
|
||||
(packet_len_ms_ > 0 &&
|
||||
delay_ms > 3 * max_packets_in_buffer_ * packet_len_ms_ / 4)) {
|
||||
delay_ms >
|
||||
static_cast<int>(3 * max_packets_in_buffer_ * packet_len_ms_ / 4))) {
|
||||
return false;
|
||||
}
|
||||
minimum_delay_ms_ = delay_ms;
|
||||
|
||||
@ -32,7 +32,7 @@ class DelayManager {
|
||||
// buffer can hold no more than |max_packets_in_buffer| packets (i.e., this
|
||||
// is the number of packet slots in the buffer). Supply a PeakDetector
|
||||
// object to the DelayManager.
|
||||
DelayManager(int max_packets_in_buffer, DelayPeakDetector* peak_detector);
|
||||
DelayManager(size_t max_packets_in_buffer, DelayPeakDetector* peak_detector);
|
||||
|
||||
virtual ~DelayManager();
|
||||
|
||||
@ -132,7 +132,7 @@ class DelayManager {
|
||||
void LimitTargetLevel();
|
||||
|
||||
bool first_packet_received_;
|
||||
const int max_packets_in_buffer_; // Capacity of the packet buffer.
|
||||
const size_t max_packets_in_buffer_; // Capacity of the packet buffer.
|
||||
IATVector iat_vector_; // Histogram of inter-arrival times.
|
||||
int iat_factor_; // Forgetting factor for updating the IAT histogram (Q15).
|
||||
int packet_iat_count_ms_; // Milliseconds elapsed since last packet.
|
||||
|
||||
@ -99,13 +99,13 @@ int DspHelper::RampSignal(AudioMultiVector* signal,
|
||||
return end_factor;
|
||||
}
|
||||
|
||||
void DspHelper::PeakDetection(int16_t* data, int data_length,
|
||||
int num_peaks, int fs_mult,
|
||||
int* peak_index, int16_t* peak_value) {
|
||||
int16_t min_index = 0;
|
||||
int16_t max_index = 0;
|
||||
void DspHelper::PeakDetection(int16_t* data, size_t data_length,
|
||||
size_t num_peaks, int fs_mult,
|
||||
size_t* peak_index, int16_t* peak_value) {
|
||||
size_t min_index = 0;
|
||||
size_t max_index = 0;
|
||||
|
||||
for (int i = 0; i <= num_peaks - 1; i++) {
|
||||
for (size_t i = 0; i <= num_peaks - 1; i++) {
|
||||
if (num_peaks == 1) {
|
||||
// Single peak. The parabola fit assumes that an extra point is
|
||||
// available; worst case it gets a zero on the high end of the signal.
|
||||
@ -148,7 +148,7 @@ void DspHelper::PeakDetection(int16_t* data, int data_length,
|
||||
}
|
||||
|
||||
void DspHelper::ParabolicFit(int16_t* signal_points, int fs_mult,
|
||||
int* peak_index, int16_t* peak_value) {
|
||||
size_t* peak_index, int16_t* peak_value) {
|
||||
uint16_t fit_index[13];
|
||||
if (fs_mult == 1) {
|
||||
fit_index[0] = 0;
|
||||
@ -235,16 +235,16 @@ void DspHelper::ParabolicFit(int16_t* signal_points, int fs_mult,
|
||||
}
|
||||
}
|
||||
|
||||
int DspHelper::MinDistortion(const int16_t* signal, int min_lag,
|
||||
int max_lag, int length,
|
||||
int32_t* distortion_value) {
|
||||
int best_index = 0;
|
||||
size_t DspHelper::MinDistortion(const int16_t* signal, size_t min_lag,
|
||||
size_t max_lag, size_t length,
|
||||
int32_t* distortion_value) {
|
||||
size_t best_index = 0;
|
||||
int32_t min_distortion = WEBRTC_SPL_WORD32_MAX;
|
||||
for (int i = min_lag; i <= max_lag; i++) {
|
||||
for (size_t i = min_lag; i <= max_lag; i++) {
|
||||
int32_t sum_diff = 0;
|
||||
const int16_t* data1 = signal;
|
||||
const int16_t* data2 = signal - i;
|
||||
for (int j = 0; j < length; j++) {
|
||||
for (size_t j = 0; j < length; j++) {
|
||||
sum_diff += WEBRTC_SPL_ABS_W32(data1[j] - data2[j]);
|
||||
}
|
||||
// Compare with previous minimum.
|
||||
@ -293,15 +293,15 @@ void DspHelper::MuteSignal(int16_t* signal, int mute_slope, size_t length) {
|
||||
}
|
||||
|
||||
int DspHelper::DownsampleTo4kHz(const int16_t* input, size_t input_length,
|
||||
int output_length, int input_rate_hz,
|
||||
size_t output_length, int input_rate_hz,
|
||||
bool compensate_delay, int16_t* output) {
|
||||
// Set filter parameters depending on input frequency.
|
||||
// NOTE: The phase delay values are wrong compared to the true phase delay
|
||||
// of the filters. However, the error is preserved (through the +1 term) for
|
||||
// consistency.
|
||||
const int16_t* filter_coefficients; // Filter coefficients.
|
||||
int16_t filter_length; // Number of coefficients.
|
||||
int16_t filter_delay; // Phase delay in samples.
|
||||
size_t filter_length; // Number of coefficients.
|
||||
size_t filter_delay; // Phase delay in samples.
|
||||
int16_t factor; // Conversion rate (inFsHz / 8000).
|
||||
switch (input_rate_hz) {
|
||||
case 8000: {
|
||||
@ -345,9 +345,8 @@ int DspHelper::DownsampleTo4kHz(const int16_t* input, size_t input_length,
|
||||
|
||||
// Returns -1 if input signal is too short; 0 otherwise.
|
||||
return WebRtcSpl_DownsampleFast(
|
||||
&input[filter_length - 1], static_cast<int>(input_length) -
|
||||
(filter_length - 1), output, output_length, filter_coefficients,
|
||||
filter_length, factor, filter_delay);
|
||||
&input[filter_length - 1], input_length - filter_length + 1, output,
|
||||
output_length, filter_coefficients, filter_length, factor, filter_delay);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -78,9 +78,9 @@ class DspHelper {
|
||||
// locations and values are written to the arrays |peak_index| and
|
||||
// |peak_value|, respectively. Both arrays must hold at least |num_peaks|
|
||||
// elements.
|
||||
static void PeakDetection(int16_t* data, int data_length,
|
||||
int num_peaks, int fs_mult,
|
||||
int* peak_index, int16_t* peak_value);
|
||||
static void PeakDetection(int16_t* data, size_t data_length,
|
||||
size_t num_peaks, int fs_mult,
|
||||
size_t* peak_index, int16_t* peak_value);
|
||||
|
||||
// Estimates the height and location of a maximum. The three values in the
|
||||
// array |signal_points| are used as basis for a parabolic fit, which is then
|
||||
@ -89,14 +89,15 @@ class DspHelper {
|
||||
// |peak_index| and |peak_value| is given in the full sample rate, as
|
||||
// indicated by the sample rate multiplier |fs_mult|.
|
||||
static void ParabolicFit(int16_t* signal_points, int fs_mult,
|
||||
int* peak_index, int16_t* peak_value);
|
||||
size_t* peak_index, int16_t* peak_value);
|
||||
|
||||
// Calculates the sum-abs-diff for |signal| when compared to a displaced
|
||||
// version of itself. Returns the displacement lag that results in the minimum
|
||||
// distortion. The resulting distortion is written to |distortion_value|.
|
||||
// The values of |min_lag| and |max_lag| are boundaries for the search.
|
||||
static int MinDistortion(const int16_t* signal, int min_lag,
|
||||
int max_lag, int length, int32_t* distortion_value);
|
||||
static size_t MinDistortion(const int16_t* signal, size_t min_lag,
|
||||
size_t max_lag, size_t length,
|
||||
int32_t* distortion_value);
|
||||
|
||||
// Mixes |length| samples from |input1| and |input2| together and writes the
|
||||
// result to |output|. The gain for |input1| starts at |mix_factor| (Q14) and
|
||||
@ -122,7 +123,7 @@ class DspHelper {
|
||||
// filters if |compensate_delay| is true. Returns -1 if the input is too short
|
||||
// to produce |output_length| samples, otherwise 0.
|
||||
static int DownsampleTo4kHz(const int16_t* input, size_t input_length,
|
||||
int output_length, int input_rate_hz,
|
||||
size_t output_length, int input_rate_hz,
|
||||
bool compensate_delay, int16_t* output);
|
||||
|
||||
private:
|
||||
|
||||
@ -149,18 +149,18 @@ void DtmfToneGenerator::Reset() {
|
||||
}
|
||||
|
||||
// Generate num_samples of DTMF signal and write to |output|.
|
||||
int DtmfToneGenerator::Generate(int num_samples,
|
||||
int DtmfToneGenerator::Generate(size_t num_samples,
|
||||
AudioMultiVector* output) {
|
||||
if (!initialized_) {
|
||||
return kNotInitialized;
|
||||
}
|
||||
|
||||
if (num_samples < 0 || !output) {
|
||||
if (!output) {
|
||||
return kParameterError;
|
||||
}
|
||||
|
||||
output->AssertSize(num_samples);
|
||||
for (int i = 0; i < num_samples; ++i) {
|
||||
for (size_t i = 0; i < num_samples; ++i) {
|
||||
// Use recursion formula y[n] = a * y[n - 1] - y[n - 2].
|
||||
int16_t temp_val_low = ((coeff1_ * sample_history1_[1] + 8192) >> 14)
|
||||
- sample_history1_[0];
|
||||
@ -186,7 +186,7 @@ int DtmfToneGenerator::Generate(int num_samples,
|
||||
output->CopyChannel(0, channel);
|
||||
}
|
||||
|
||||
return num_samples;
|
||||
return static_cast<int>(num_samples);
|
||||
}
|
||||
|
||||
bool DtmfToneGenerator::initialized() const {
|
||||
|
||||
@ -30,7 +30,7 @@ class DtmfToneGenerator {
|
||||
virtual ~DtmfToneGenerator() {}
|
||||
virtual int Init(int fs, int event, int attenuation);
|
||||
virtual void Reset();
|
||||
virtual int Generate(int num_samples, AudioMultiVector* output);
|
||||
virtual int Generate(size_t num_samples, AudioMultiVector* output);
|
||||
virtual bool initialized() const;
|
||||
|
||||
private:
|
||||
|
||||
@ -171,8 +171,6 @@ TEST(DtmfToneGenerator, TestErrors) {
|
||||
// Initialize with valid parameters.
|
||||
ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation));
|
||||
EXPECT_TRUE(tone_gen.initialized());
|
||||
// Negative number of samples.
|
||||
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Generate(-1, &signal));
|
||||
// NULL pointer to destination.
|
||||
EXPECT_EQ(DtmfToneGenerator::kParameterError,
|
||||
tone_gen.Generate(kNumSamples, NULL));
|
||||
|
||||
@ -47,7 +47,7 @@ Expand::Expand(BackgroundNoise* background_noise,
|
||||
expand_duration_samples_(0),
|
||||
channel_parameters_(new ChannelParameters[num_channels_]) {
|
||||
assert(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000);
|
||||
assert(fs <= kMaxSampleRate); // Should not be possible.
|
||||
assert(fs <= static_cast<int>(kMaxSampleRate)); // Should not be possible.
|
||||
assert(num_channels_ > 0);
|
||||
memset(expand_lags_, 0, sizeof(expand_lags_));
|
||||
Reset();
|
||||
@ -72,7 +72,7 @@ int Expand::Process(AudioMultiVector* output) {
|
||||
int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.
|
||||
int16_t* voiced_vector_storage = temp_data;
|
||||
int16_t* voiced_vector = &voiced_vector_storage[overlap_length_];
|
||||
static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
|
||||
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
|
||||
int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder;
|
||||
@ -87,7 +87,7 @@ int Expand::Process(AudioMultiVector* output) {
|
||||
} else {
|
||||
// This is not the first expansion, parameters are already estimated.
|
||||
// Extract a noise segment.
|
||||
int16_t rand_length = max_lag_;
|
||||
size_t rand_length = max_lag_;
|
||||
// This only applies to SWB where length could be larger than 256.
|
||||
assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30);
|
||||
GenerateRandomVector(2, rand_length, random_vector);
|
||||
@ -119,7 +119,7 @@ int Expand::Process(AudioMultiVector* output) {
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound(
|
||||
¶meters.expand_vector0[expansion_vector_position], 3,
|
||||
¶meters.expand_vector1[expansion_vector_position], 1, 2,
|
||||
voiced_vector_storage, static_cast<int>(temp_length));
|
||||
voiced_vector_storage, temp_length);
|
||||
} else if (current_lag_index_ == 2) {
|
||||
// Mix 1/2 of expand_vector0 with 1/2 of expand_vector1.
|
||||
assert(expansion_vector_position + temp_length <=
|
||||
@ -129,7 +129,7 @@ int Expand::Process(AudioMultiVector* output) {
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound(
|
||||
¶meters.expand_vector0[expansion_vector_position], 1,
|
||||
¶meters.expand_vector1[expansion_vector_position], 1, 1,
|
||||
voiced_vector_storage, static_cast<int>(temp_length));
|
||||
voiced_vector_storage, temp_length);
|
||||
}
|
||||
|
||||
// Get tapering window parameters. Values are in Q15.
|
||||
@ -196,10 +196,10 @@ int Expand::Process(AudioMultiVector* output) {
|
||||
WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector,
|
||||
parameters.ar_gain, add_constant,
|
||||
parameters.ar_gain_scale,
|
||||
static_cast<int>(current_lag));
|
||||
current_lag);
|
||||
WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector,
|
||||
parameters.ar_filter, kUnvoicedLpcOrder + 1,
|
||||
static_cast<int>(current_lag));
|
||||
current_lag);
|
||||
memcpy(parameters.ar_filter_state,
|
||||
&(unvoiced_vector[current_lag - kUnvoicedLpcOrder]),
|
||||
sizeof(int16_t) * kUnvoicedLpcOrder);
|
||||
@ -212,7 +212,8 @@ int Expand::Process(AudioMultiVector* output) {
|
||||
// (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms;
|
||||
// >= 64 * fs_mult => go from 1 to 0 in about 32 ms.
|
||||
// temp_shift = getbits(max_lag_) - 5.
|
||||
int temp_shift = (31 - WebRtcSpl_NormW32(max_lag_)) - 5;
|
||||
int temp_shift =
|
||||
(31 - WebRtcSpl_NormW32(rtc::checked_cast<int32_t>(max_lag_))) - 5;
|
||||
int16_t mix_factor_increment = 256 >> temp_shift;
|
||||
if (stop_muting_) {
|
||||
mix_factor_increment = 0;
|
||||
@ -237,7 +238,7 @@ int Expand::Process(AudioMultiVector* output) {
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound(
|
||||
voiced_vector + temp_length, parameters.current_voice_mix_factor,
|
||||
unvoiced_vector + temp_length, temp_scale, 14,
|
||||
temp_data + temp_length, static_cast<int>(current_lag - temp_length));
|
||||
temp_data + temp_length, current_lag - temp_length);
|
||||
}
|
||||
|
||||
// Select muting slope depending on how many consecutive expands we have
|
||||
@ -258,7 +259,7 @@ int Expand::Process(AudioMultiVector* output) {
|
||||
// Mute to the previous level, then continue with the muting.
|
||||
WebRtcSpl_AffineTransformVector(temp_data, temp_data,
|
||||
parameters.mute_factor, 8192,
|
||||
14, static_cast<int>(current_lag));
|
||||
14, current_lag);
|
||||
|
||||
if (!stop_muting_) {
|
||||
DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag);
|
||||
@ -351,26 +352,26 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
int32_t auto_correlation[kUnvoicedLpcOrder + 1];
|
||||
int16_t reflection_coeff[kUnvoicedLpcOrder];
|
||||
int16_t correlation_vector[kMaxSampleRate / 8000 * 102];
|
||||
int best_correlation_index[kNumCorrelationCandidates];
|
||||
size_t best_correlation_index[kNumCorrelationCandidates];
|
||||
int16_t best_correlation[kNumCorrelationCandidates];
|
||||
int16_t best_distortion_index[kNumCorrelationCandidates];
|
||||
size_t best_distortion_index[kNumCorrelationCandidates];
|
||||
int16_t best_distortion[kNumCorrelationCandidates];
|
||||
int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1];
|
||||
int32_t best_distortion_w32[kNumCorrelationCandidates];
|
||||
static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
|
||||
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
|
||||
|
||||
int fs_mult = fs_hz_ / 8000;
|
||||
|
||||
// Pre-calculate common multiplications with fs_mult.
|
||||
int fs_mult_4 = fs_mult * 4;
|
||||
int fs_mult_20 = fs_mult * 20;
|
||||
int fs_mult_120 = fs_mult * 120;
|
||||
int fs_mult_dist_len = fs_mult * kDistortionLength;
|
||||
int fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;
|
||||
size_t fs_mult_4 = static_cast<size_t>(fs_mult * 4);
|
||||
size_t fs_mult_20 = static_cast<size_t>(fs_mult * 20);
|
||||
size_t fs_mult_120 = static_cast<size_t>(fs_mult * 120);
|
||||
size_t fs_mult_dist_len = fs_mult * kDistortionLength;
|
||||
size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;
|
||||
|
||||
const size_t signal_length = 256 * fs_mult;
|
||||
const size_t signal_length = static_cast<size_t>(256 * fs_mult);
|
||||
const int16_t* audio_history =
|
||||
&(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];
|
||||
|
||||
@ -379,7 +380,7 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
|
||||
// Calculate correlation in downsampled domain (4 kHz sample rate).
|
||||
int correlation_scale;
|
||||
int correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.
|
||||
size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.
|
||||
// If it is decided to break bit-exactness |correlation_length| should be
|
||||
// initialized to the return value of Correlation().
|
||||
Correlation(audio_history, signal_length, correlation_vector,
|
||||
@ -398,11 +399,11 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
|
||||
// Calculate distortion around the |kNumCorrelationCandidates| best lags.
|
||||
int distortion_scale = 0;
|
||||
for (int i = 0; i < kNumCorrelationCandidates; i++) {
|
||||
int16_t min_index = std::max(fs_mult_20,
|
||||
best_correlation_index[i] - fs_mult_4);
|
||||
int16_t max_index = std::min(fs_mult_120 - 1,
|
||||
best_correlation_index[i] + fs_mult_4);
|
||||
for (size_t i = 0; i < kNumCorrelationCandidates; i++) {
|
||||
size_t min_index = std::max(fs_mult_20,
|
||||
best_correlation_index[i] - fs_mult_4);
|
||||
size_t max_index = std::min(fs_mult_120 - 1,
|
||||
best_correlation_index[i] + fs_mult_4);
|
||||
best_distortion_index[i] = DspHelper::MinDistortion(
|
||||
&(audio_history[signal_length - fs_mult_dist_len]), min_index,
|
||||
max_index, fs_mult_dist_len, &best_distortion_w32[i]);
|
||||
@ -416,8 +417,8 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
// Find the maximizing index |i| of the cost function
|
||||
// f[i] = best_correlation[i] / best_distortion[i].
|
||||
int32_t best_ratio = std::numeric_limits<int32_t>::min();
|
||||
int best_index = std::numeric_limits<int>::max();
|
||||
for (int i = 0; i < kNumCorrelationCandidates; ++i) {
|
||||
size_t best_index = std::numeric_limits<size_t>::max();
|
||||
for (size_t i = 0; i < kNumCorrelationCandidates; ++i) {
|
||||
int32_t ratio;
|
||||
if (best_distortion[i] > 0) {
|
||||
ratio = (best_correlation[i] << 16) / best_distortion[i];
|
||||
@ -432,19 +433,20 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
}
|
||||
}
|
||||
|
||||
int distortion_lag = best_distortion_index[best_index];
|
||||
int correlation_lag = best_correlation_index[best_index];
|
||||
size_t distortion_lag = best_distortion_index[best_index];
|
||||
size_t correlation_lag = best_correlation_index[best_index];
|
||||
max_lag_ = std::max(distortion_lag, correlation_lag);
|
||||
|
||||
// Calculate the exact best correlation in the range between
|
||||
// |correlation_lag| and |distortion_lag|.
|
||||
correlation_length =
|
||||
std::max(std::min(distortion_lag + 10, fs_mult_120), 60 * fs_mult);
|
||||
std::max(std::min(distortion_lag + 10, fs_mult_120),
|
||||
static_cast<size_t>(60 * fs_mult));
|
||||
|
||||
int start_index = std::min(distortion_lag, correlation_lag);
|
||||
int correlation_lags =
|
||||
WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1;
|
||||
assert(correlation_lags <= 99 * fs_mult + 1); // Cannot be larger.
|
||||
size_t start_index = std::min(distortion_lag, correlation_lag);
|
||||
size_t correlation_lags = static_cast<size_t>(
|
||||
WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);
|
||||
assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));
|
||||
|
||||
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
|
||||
ChannelParameters& parameters = channel_parameters_[channel_ix];
|
||||
@ -454,7 +456,7 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
- correlation_lags],
|
||||
correlation_length + start_index + correlation_lags - 1);
|
||||
correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +
|
||||
(31 - WebRtcSpl_NormW32(correlation_length)) - 31;
|
||||
(31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;
|
||||
correlation_scale = std::max(0, correlation_scale);
|
||||
|
||||
// Calculate the correlation, store in |correlation_vector2|.
|
||||
@ -465,7 +467,8 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
correlation_length, correlation_lags, correlation_scale, -1);
|
||||
|
||||
// Find maximizing index.
|
||||
best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);
|
||||
best_index = static_cast<size_t>(
|
||||
WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags));
|
||||
int32_t max_correlation = correlation_vector2[best_index];
|
||||
// Compensate index with start offset.
|
||||
best_index = best_index + start_index;
|
||||
@ -508,7 +511,7 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
|
||||
// Extract the two vectors expand_vector0 and expand_vector1 from
|
||||
// |audio_history|.
|
||||
int16_t expansion_length = static_cast<int16_t>(max_lag_ + overlap_length_);
|
||||
size_t expansion_length = max_lag_ + overlap_length_;
|
||||
const int16_t* vector1 = &(audio_history[signal_length - expansion_length]);
|
||||
const int16_t* vector2 = vector1 - distortion_lag;
|
||||
// Normalize the second vector to the same energy as the first.
|
||||
@ -527,15 +530,15 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
// Calculate scaled_energy1 / scaled_energy2 in Q13.
|
||||
int32_t energy_ratio = WebRtcSpl_DivW32W16(
|
||||
WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1),
|
||||
energy2 >> scaled_energy2);
|
||||
static_cast<int16_t>(energy2 >> scaled_energy2));
|
||||
// Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26).
|
||||
amplitude_ratio = WebRtcSpl_SqrtFloor(energy_ratio << 13);
|
||||
amplitude_ratio =
|
||||
static_cast<int16_t>(WebRtcSpl_SqrtFloor(energy_ratio << 13));
|
||||
// Copy the two vectors and give them the same energy.
|
||||
parameters.expand_vector0.Clear();
|
||||
parameters.expand_vector0.PushBack(vector1, expansion_length);
|
||||
parameters.expand_vector1.Clear();
|
||||
if (parameters.expand_vector1.Size() <
|
||||
static_cast<size_t>(expansion_length)) {
|
||||
if (parameters.expand_vector1.Size() < expansion_length) {
|
||||
parameters.expand_vector1.Extend(
|
||||
expansion_length - parameters.expand_vector1.Size());
|
||||
}
|
||||
@ -626,7 +629,7 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
|
||||
if (channel_ix == 0) {
|
||||
// Extract a noise segment.
|
||||
int16_t noise_length;
|
||||
size_t noise_length;
|
||||
if (distortion_lag < 40) {
|
||||
noise_length = 2 * distortion_lag + 30;
|
||||
} else {
|
||||
@ -768,7 +771,7 @@ void Expand::Correlation(const int16_t* input,
|
||||
int* output_scale) const {
|
||||
// Set parameters depending on sample rate.
|
||||
const int16_t* filter_coefficients;
|
||||
int16_t num_coefficients;
|
||||
size_t num_coefficients;
|
||||
int16_t downsampling_factor;
|
||||
if (fs_hz_ == 8000) {
|
||||
num_coefficients = 3;
|
||||
@ -790,14 +793,14 @@ void Expand::Correlation(const int16_t* input,
|
||||
|
||||
// Correlate from lag 10 to lag 60 in downsampled domain.
|
||||
// (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.)
|
||||
static const int kCorrelationStartLag = 10;
|
||||
static const int kNumCorrelationLags = 54;
|
||||
static const int kCorrelationLength = 60;
|
||||
static const size_t kCorrelationStartLag = 10;
|
||||
static const size_t kNumCorrelationLags = 54;
|
||||
static const size_t kCorrelationLength = 60;
|
||||
// Downsample to 4 kHz sample rate.
|
||||
static const int kDownsampledLength = kCorrelationStartLag
|
||||
static const size_t kDownsampledLength = kCorrelationStartLag
|
||||
+ kNumCorrelationLags + kCorrelationLength;
|
||||
int16_t downsampled_input[kDownsampledLength];
|
||||
static const int kFilterDelay = 0;
|
||||
static const size_t kFilterDelay = 0;
|
||||
WebRtcSpl_DownsampleFast(
|
||||
input + input_length - kDownsampledLength * downsampling_factor,
|
||||
kDownsampledLength * downsampling_factor, downsampled_input,
|
||||
@ -859,9 +862,9 @@ void Expand::GenerateBackgroundNoise(int16_t* random_vector,
|
||||
bool too_many_expands,
|
||||
size_t num_noise_samples,
|
||||
int16_t* buffer) {
|
||||
static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
|
||||
assert(num_noise_samples <= static_cast<size_t>(kMaxSampleRate / 8000 * 125));
|
||||
assert(num_noise_samples <= (kMaxSampleRate / 8000 * 125));
|
||||
int16_t* noise_samples = &buffer[kNoiseLpcOrder];
|
||||
if (background_noise_->initialized()) {
|
||||
// Use background noise parameters.
|
||||
@ -879,12 +882,12 @@ void Expand::GenerateBackgroundNoise(int16_t* random_vector,
|
||||
scaled_random_vector, random_vector,
|
||||
background_noise_->Scale(channel), dc_offset,
|
||||
background_noise_->ScaleShift(channel),
|
||||
static_cast<int>(num_noise_samples));
|
||||
num_noise_samples);
|
||||
|
||||
WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples,
|
||||
background_noise_->Filter(channel),
|
||||
kNoiseLpcOrder + 1,
|
||||
static_cast<int>(num_noise_samples));
|
||||
num_noise_samples);
|
||||
|
||||
background_noise_->SetFilterState(
|
||||
channel,
|
||||
@ -931,7 +934,7 @@ void Expand::GenerateBackgroundNoise(int16_t* random_vector,
|
||||
// kBgnFade has reached 0.
|
||||
WebRtcSpl_AffineTransformVector(noise_samples, noise_samples,
|
||||
bgn_mute_factor, 8192, 14,
|
||||
static_cast<int>(num_noise_samples));
|
||||
num_noise_samples);
|
||||
}
|
||||
}
|
||||
// Update mute_factor in BackgroundNoise class.
|
||||
|
||||
@ -64,7 +64,7 @@ class Expand {
|
||||
|
||||
// Accessors and mutators.
|
||||
virtual size_t overlap_length() const;
|
||||
int16_t max_lag() const { return max_lag_; }
|
||||
size_t max_lag() const { return max_lag_; }
|
||||
|
||||
protected:
|
||||
static const int kMaxConsecutiveExpands = 200;
|
||||
@ -96,11 +96,11 @@ class Expand {
|
||||
int consecutive_expands_;
|
||||
|
||||
private:
|
||||
static const int kUnvoicedLpcOrder = 6;
|
||||
static const int kNumCorrelationCandidates = 3;
|
||||
static const int kDistortionLength = 20;
|
||||
static const int kLpcAnalysisLength = 160;
|
||||
static const int kMaxSampleRate = 48000;
|
||||
static const size_t kUnvoicedLpcOrder = 6;
|
||||
static const size_t kNumCorrelationCandidates = 3;
|
||||
static const size_t kDistortionLength = 20;
|
||||
static const size_t kLpcAnalysisLength = 160;
|
||||
static const size_t kMaxSampleRate = 48000;
|
||||
static const int kNumLags = 3;
|
||||
|
||||
struct ChannelParameters {
|
||||
@ -132,7 +132,7 @@ class Expand {
|
||||
BackgroundNoise* const background_noise_;
|
||||
StatisticsCalculator* const statistics_;
|
||||
const size_t overlap_length_;
|
||||
int16_t max_lag_;
|
||||
size_t max_lag_;
|
||||
size_t expand_lags_[kNumLags];
|
||||
int lag_index_direction_;
|
||||
int current_lag_index_;
|
||||
|
||||
@ -45,7 +45,7 @@ struct NetEqNetworkStatistics {
|
||||
// decoding (in Q14).
|
||||
int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
|
||||
// (positive or negative).
|
||||
int added_zero_samples; // Number of zero samples added in "off" mode.
|
||||
size_t added_zero_samples; // Number of zero samples added in "off" mode.
|
||||
};
|
||||
|
||||
enum NetEqOutputType {
|
||||
@ -87,7 +87,7 @@ class NetEq {
|
||||
|
||||
int sample_rate_hz; // Initial value. Will change with input data.
|
||||
bool enable_audio_classifier;
|
||||
int max_packets_in_buffer;
|
||||
size_t max_packets_in_buffer;
|
||||
int max_delay_ms;
|
||||
BackgroundNoiseMode background_noise_mode;
|
||||
NetEqPlayoutMode playout_mode;
|
||||
@ -165,7 +165,7 @@ class NetEq {
|
||||
// The speech type is written to |type|, if |type| is not NULL.
|
||||
// Returns kOK on success, or kFail in case of an error.
|
||||
virtual int GetAudio(size_t max_length, int16_t* output_audio,
|
||||
int* samples_per_channel, int* num_channels,
|
||||
size_t* samples_per_channel, int* num_channels,
|
||||
NetEqOutputType* type) = 0;
|
||||
|
||||
// Associates |rtp_payload_type| with |codec| and stores the information in
|
||||
|
||||
@ -31,25 +31,25 @@ Merge::Merge(int fs_hz,
|
||||
: fs_hz_(fs_hz),
|
||||
num_channels_(num_channels),
|
||||
fs_mult_(fs_hz_ / 8000),
|
||||
timestamps_per_call_(fs_hz_ / 100),
|
||||
timestamps_per_call_(static_cast<size_t>(fs_hz_ / 100)),
|
||||
expand_(expand),
|
||||
sync_buffer_(sync_buffer),
|
||||
expanded_(num_channels_) {
|
||||
assert(num_channels_ > 0);
|
||||
}
|
||||
|
||||
int Merge::Process(int16_t* input, size_t input_length,
|
||||
int16_t* external_mute_factor_array,
|
||||
AudioMultiVector* output) {
|
||||
size_t Merge::Process(int16_t* input, size_t input_length,
|
||||
int16_t* external_mute_factor_array,
|
||||
AudioMultiVector* output) {
|
||||
// TODO(hlundin): Change to an enumerator and skip assert.
|
||||
assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
|
||||
fs_hz_ == 48000);
|
||||
assert(fs_hz_ <= kMaxSampleRate); // Should not be possible.
|
||||
|
||||
int old_length;
|
||||
int expand_period;
|
||||
size_t old_length;
|
||||
size_t expand_period;
|
||||
// Get expansion data to overlap and mix with.
|
||||
int expanded_length = GetExpandedSignal(&old_length, &expand_period);
|
||||
size_t expanded_length = GetExpandedSignal(&old_length, &expand_period);
|
||||
|
||||
// Transfer input signal to an AudioMultiVector.
|
||||
AudioMultiVector input_vector(num_channels_);
|
||||
@ -57,7 +57,7 @@ int Merge::Process(int16_t* input, size_t input_length,
|
||||
size_t input_length_per_channel = input_vector.Size();
|
||||
assert(input_length_per_channel == input_length / num_channels_);
|
||||
|
||||
int16_t best_correlation_index = 0;
|
||||
size_t best_correlation_index = 0;
|
||||
size_t output_length = 0;
|
||||
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
@ -65,8 +65,8 @@ int Merge::Process(int16_t* input, size_t input_length,
|
||||
int16_t* expanded_channel = &expanded_[channel][0];
|
||||
int16_t expanded_max, input_max;
|
||||
int16_t new_mute_factor = SignalScaling(
|
||||
input_channel, static_cast<int>(input_length_per_channel),
|
||||
expanded_channel, &expanded_max, &input_max);
|
||||
input_channel, input_length_per_channel, expanded_channel,
|
||||
&expanded_max, &input_max);
|
||||
|
||||
// Adjust muting factor (product of "main" muting factor and expand muting
|
||||
// factor).
|
||||
@ -84,13 +84,13 @@ int Merge::Process(int16_t* input, size_t input_length,
|
||||
// Downsample, correlate, and find strongest correlation period for the
|
||||
// master (i.e., first) channel only.
|
||||
// Downsample to 4kHz sample rate.
|
||||
Downsample(input_channel, static_cast<int>(input_length_per_channel),
|
||||
expanded_channel, expanded_length);
|
||||
Downsample(input_channel, input_length_per_channel, expanded_channel,
|
||||
expanded_length);
|
||||
|
||||
// Calculate the lag of the strongest correlation period.
|
||||
best_correlation_index = CorrelateAndPeakSearch(
|
||||
expanded_max, input_max, old_length,
|
||||
static_cast<int>(input_length_per_channel), expand_period);
|
||||
input_length_per_channel, expand_period);
|
||||
}
|
||||
|
||||
static const int kTempDataSize = 3600;
|
||||
@ -99,11 +99,11 @@ int Merge::Process(int16_t* input, size_t input_length,
|
||||
|
||||
// Mute the new decoded data if needed (and unmute it linearly).
|
||||
// This is the overlapping part of expanded_signal.
|
||||
int interpolation_length = std::min(
|
||||
size_t interpolation_length = std::min(
|
||||
kMaxCorrelationLength * fs_mult_,
|
||||
expanded_length - best_correlation_index);
|
||||
interpolation_length = std::min(interpolation_length,
|
||||
static_cast<int>(input_length_per_channel));
|
||||
input_length_per_channel);
|
||||
if (*external_mute_factor < 16384) {
|
||||
// Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB,
|
||||
// and so on.
|
||||
@ -153,14 +153,14 @@ int Merge::Process(int16_t* input, size_t input_length,
|
||||
|
||||
// Return new added length. |old_length| samples were borrowed from
|
||||
// |sync_buffer_|.
|
||||
return static_cast<int>(output_length) - old_length;
|
||||
return output_length - old_length;
|
||||
}
|
||||
|
||||
int Merge::GetExpandedSignal(int* old_length, int* expand_period) {
|
||||
size_t Merge::GetExpandedSignal(size_t* old_length, size_t* expand_period) {
|
||||
// Check how much data that is left since earlier.
|
||||
*old_length = static_cast<int>(sync_buffer_->FutureLength());
|
||||
*old_length = sync_buffer_->FutureLength();
|
||||
// Should never be less than overlap_length.
|
||||
assert(*old_length >= static_cast<int>(expand_->overlap_length()));
|
||||
assert(*old_length >= expand_->overlap_length());
|
||||
// Generate data to merge the overlap with using expand.
|
||||
expand_->SetParametersForMergeAfterExpand();
|
||||
|
||||
@ -171,7 +171,7 @@ int Merge::GetExpandedSignal(int* old_length, int* expand_period) {
|
||||
// but shift them towards the end of the buffer. This is ok, since all of
|
||||
// the buffer will be expand data anyway, so as long as the beginning is
|
||||
// left untouched, we're fine.
|
||||
int16_t length_diff = *old_length - 210 * kMaxSampleRate / 8000;
|
||||
size_t length_diff = *old_length - 210 * kMaxSampleRate / 8000;
|
||||
sync_buffer_->InsertZerosAtIndex(length_diff, sync_buffer_->next_index());
|
||||
*old_length = 210 * kMaxSampleRate / 8000;
|
||||
// This is the truncated length.
|
||||
@ -181,34 +181,34 @@ int Merge::GetExpandedSignal(int* old_length, int* expand_period) {
|
||||
|
||||
AudioMultiVector expanded_temp(num_channels_);
|
||||
expand_->Process(&expanded_temp);
|
||||
*expand_period = static_cast<int>(expanded_temp.Size()); // Samples per
|
||||
// channel.
|
||||
*expand_period = expanded_temp.Size(); // Samples per channel.
|
||||
|
||||
expanded_.Clear();
|
||||
// Copy what is left since earlier into the expanded vector.
|
||||
expanded_.PushBackFromIndex(*sync_buffer_, sync_buffer_->next_index());
|
||||
assert(expanded_.Size() == static_cast<size_t>(*old_length));
|
||||
assert(expanded_.Size() == *old_length);
|
||||
assert(expanded_temp.Size() > 0);
|
||||
// Do "ugly" copy and paste from the expanded in order to generate more data
|
||||
// to correlate (but not interpolate) with.
|
||||
const int required_length = (120 + 80 + 2) * fs_mult_;
|
||||
if (expanded_.Size() < static_cast<size_t>(required_length)) {
|
||||
while (expanded_.Size() < static_cast<size_t>(required_length)) {
|
||||
const size_t required_length = static_cast<size_t>((120 + 80 + 2) * fs_mult_);
|
||||
if (expanded_.Size() < required_length) {
|
||||
while (expanded_.Size() < required_length) {
|
||||
// Append one more pitch period each time.
|
||||
expanded_.PushBack(expanded_temp);
|
||||
}
|
||||
// Trim the length to exactly |required_length|.
|
||||
expanded_.PopBack(expanded_.Size() - required_length);
|
||||
}
|
||||
assert(expanded_.Size() >= static_cast<size_t>(required_length));
|
||||
assert(expanded_.Size() >= required_length);
|
||||
return required_length;
|
||||
}
|
||||
|
||||
int16_t Merge::SignalScaling(const int16_t* input, int input_length,
|
||||
int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal,
|
||||
int16_t* expanded_max, int16_t* input_max) const {
|
||||
// Adjust muting factor if new vector is more or less of the BGN energy.
|
||||
const int mod_input_length = std::min(64 * fs_mult_, input_length);
|
||||
const size_t mod_input_length =
|
||||
std::min(static_cast<size_t>(64 * fs_mult_), input_length);
|
||||
*expanded_max = WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
|
||||
*input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
|
||||
|
||||
@ -260,13 +260,13 @@ int16_t Merge::SignalScaling(const int16_t* input, int input_length,
|
||||
|
||||
// TODO(hlundin): There are some parameter values in this method that seem
|
||||
// strange. Compare with Expand::Correlation.
|
||||
void Merge::Downsample(const int16_t* input, int input_length,
|
||||
const int16_t* expanded_signal, int expanded_length) {
|
||||
void Merge::Downsample(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal, size_t expanded_length) {
|
||||
const int16_t* filter_coefficients;
|
||||
int num_coefficients;
|
||||
size_t num_coefficients;
|
||||
int decimation_factor = fs_hz_ / 4000;
|
||||
static const int kCompensateDelay = 0;
|
||||
int length_limit = fs_hz_ / 100; // 10 ms in samples.
|
||||
static const size_t kCompensateDelay = 0;
|
||||
size_t length_limit = static_cast<size_t>(fs_hz_ / 100); // 10 ms in samples.
|
||||
if (fs_hz_ == 8000) {
|
||||
filter_coefficients = DspHelper::kDownsample8kHzTbl;
|
||||
num_coefficients = 3;
|
||||
@ -280,7 +280,7 @@ void Merge::Downsample(const int16_t* input, int input_length,
|
||||
filter_coefficients = DspHelper::kDownsample48kHzTbl;
|
||||
num_coefficients = 7;
|
||||
}
|
||||
int signal_offset = num_coefficients - 1;
|
||||
size_t signal_offset = num_coefficients - 1;
|
||||
WebRtcSpl_DownsampleFast(&expanded_signal[signal_offset],
|
||||
expanded_length - signal_offset,
|
||||
expanded_downsampled_, kExpandDownsampLength,
|
||||
@ -288,10 +288,10 @@ void Merge::Downsample(const int16_t* input, int input_length,
|
||||
decimation_factor, kCompensateDelay);
|
||||
if (input_length <= length_limit) {
|
||||
// Not quite long enough, so we have to cheat a bit.
|
||||
int16_t temp_len = input_length - signal_offset;
|
||||
size_t temp_len = input_length - signal_offset;
|
||||
// TODO(hlundin): Should |downsamp_temp_len| be corrected for round-off
|
||||
// errors? I.e., (temp_len + decimation_factor - 1) / decimation_factor?
|
||||
int16_t downsamp_temp_len = temp_len / decimation_factor;
|
||||
size_t downsamp_temp_len = temp_len / decimation_factor;
|
||||
WebRtcSpl_DownsampleFast(&input[signal_offset], temp_len,
|
||||
input_downsampled_, downsamp_temp_len,
|
||||
filter_coefficients, num_coefficients,
|
||||
@ -307,12 +307,12 @@ void Merge::Downsample(const int16_t* input, int input_length,
|
||||
}
|
||||
}
|
||||
|
||||
int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
int start_position, int input_length,
|
||||
int expand_period) const {
|
||||
size_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
size_t start_position, size_t input_length,
|
||||
size_t expand_period) const {
|
||||
// Calculate correlation without any normalization.
|
||||
const int max_corr_length = kMaxCorrelationLength;
|
||||
int stop_position_downsamp =
|
||||
const size_t max_corr_length = kMaxCorrelationLength;
|
||||
size_t stop_position_downsamp =
|
||||
std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1);
|
||||
int correlation_shift = 0;
|
||||
if (expanded_max * input_max > 26843546) {
|
||||
@ -325,8 +325,8 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
stop_position_downsamp, correlation_shift, 1);
|
||||
|
||||
// Normalize correlation to 14 bits and copy to a 16-bit array.
|
||||
const int pad_length = static_cast<int>(expand_->overlap_length() - 1);
|
||||
const int correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength;
|
||||
const size_t pad_length = expand_->overlap_length() - 1;
|
||||
const size_t correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength;
|
||||
rtc::scoped_ptr<int16_t[]> correlation16(
|
||||
new int16_t[correlation_buffer_size]);
|
||||
memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t));
|
||||
@ -342,21 +342,20 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
// (1) w16_bestIndex + input_length <
|
||||
// timestamps_per_call_ + expand_->overlap_length();
|
||||
// (2) w16_bestIndex + input_length < start_position.
|
||||
int start_index = timestamps_per_call_ +
|
||||
static_cast<int>(expand_->overlap_length());
|
||||
size_t start_index = timestamps_per_call_ + expand_->overlap_length();
|
||||
start_index = std::max(start_position, start_index);
|
||||
start_index = (input_length > start_index) ? 0 : (start_index - input_length);
|
||||
// Downscale starting index to 4kHz domain. (fs_mult_ * 2 = fs_hz_ / 4000.)
|
||||
int start_index_downsamp = start_index / (fs_mult_ * 2);
|
||||
size_t start_index_downsamp = start_index / (fs_mult_ * 2);
|
||||
|
||||
// Calculate a modified |stop_position_downsamp| to account for the increased
|
||||
// start index |start_index_downsamp| and the effective array length.
|
||||
int modified_stop_pos =
|
||||
size_t modified_stop_pos =
|
||||
std::min(stop_position_downsamp,
|
||||
kMaxCorrelationLength + pad_length - start_index_downsamp);
|
||||
int best_correlation_index;
|
||||
size_t best_correlation_index;
|
||||
int16_t best_correlation;
|
||||
static const int kNumCorrelationCandidates = 1;
|
||||
static const size_t kNumCorrelationCandidates = 1;
|
||||
DspHelper::PeakDetection(&correlation_ptr[start_index_downsamp],
|
||||
modified_stop_pos, kNumCorrelationCandidates,
|
||||
fs_mult_, &best_correlation_index,
|
||||
@ -368,16 +367,16 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
// least 10ms + overlap . (This should never happen thanks to the above
|
||||
// modification of peak-finding starting point.)
|
||||
while (((best_correlation_index + input_length) <
|
||||
static_cast<int>(timestamps_per_call_ + expand_->overlap_length())) ||
|
||||
((best_correlation_index + input_length) < start_position)) {
|
||||
(timestamps_per_call_ + expand_->overlap_length())) ||
|
||||
((best_correlation_index + input_length) < start_position)) {
|
||||
assert(false); // Should never happen.
|
||||
best_correlation_index += expand_period; // Jump one lag ahead.
|
||||
}
|
||||
return best_correlation_index;
|
||||
}
|
||||
|
||||
int Merge::RequiredFutureSamples() {
|
||||
return static_cast<int>(fs_hz_ / 100 * num_channels_); // 10 ms.
|
||||
size_t Merge::RequiredFutureSamples() {
|
||||
return fs_hz_ / 100 * num_channels_; // 10 ms.
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -46,11 +46,11 @@ class Merge {
|
||||
// de-interleaving |input|. The values in |external_mute_factor_array| (Q14)
|
||||
// will be used to scale the audio, and is updated in the process. The array
|
||||
// must have |num_channels_| elements.
|
||||
virtual int Process(int16_t* input, size_t input_length,
|
||||
int16_t* external_mute_factor_array,
|
||||
AudioMultiVector* output);
|
||||
virtual size_t Process(int16_t* input, size_t input_length,
|
||||
int16_t* external_mute_factor_array,
|
||||
AudioMultiVector* output);
|
||||
|
||||
virtual int RequiredFutureSamples();
|
||||
virtual size_t RequiredFutureSamples();
|
||||
|
||||
protected:
|
||||
const int fs_hz_;
|
||||
@ -58,38 +58,38 @@ class Merge {
|
||||
|
||||
private:
|
||||
static const int kMaxSampleRate = 48000;
|
||||
static const int kExpandDownsampLength = 100;
|
||||
static const int kInputDownsampLength = 40;
|
||||
static const int kMaxCorrelationLength = 60;
|
||||
static const size_t kExpandDownsampLength = 100;
|
||||
static const size_t kInputDownsampLength = 40;
|
||||
static const size_t kMaxCorrelationLength = 60;
|
||||
|
||||
// Calls |expand_| to get more expansion data to merge with. The data is
|
||||
// written to |expanded_signal_|. Returns the length of the expanded data,
|
||||
// while |expand_period| will be the number of samples in one expansion period
|
||||
// (typically one pitch period). The value of |old_length| will be the number
|
||||
// of samples that were taken from the |sync_buffer_|.
|
||||
int GetExpandedSignal(int* old_length, int* expand_period);
|
||||
size_t GetExpandedSignal(size_t* old_length, size_t* expand_period);
|
||||
|
||||
// Analyzes |input| and |expanded_signal| to find maximum values. Returns
|
||||
// a muting factor (Q14) to be used on the new data.
|
||||
int16_t SignalScaling(const int16_t* input, int input_length,
|
||||
int16_t SignalScaling(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal,
|
||||
int16_t* expanded_max, int16_t* input_max) const;
|
||||
|
||||
// Downsamples |input| (|input_length| samples) and |expanded_signal| to
|
||||
// 4 kHz sample rate. The downsampled signals are written to
|
||||
// |input_downsampled_| and |expanded_downsampled_|, respectively.
|
||||
void Downsample(const int16_t* input, int input_length,
|
||||
const int16_t* expanded_signal, int expanded_length);
|
||||
void Downsample(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal, size_t expanded_length);
|
||||
|
||||
// Calculates cross-correlation between |input_downsampled_| and
|
||||
// |expanded_downsampled_|, and finds the correlation maximum. The maximizing
|
||||
// lag is returned.
|
||||
int16_t CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
int start_position, int input_length,
|
||||
int expand_period) const;
|
||||
size_t CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
size_t start_position, size_t input_length,
|
||||
size_t expand_period) const;
|
||||
|
||||
const int fs_mult_; // fs_hz_ / 8000.
|
||||
const int timestamps_per_call_;
|
||||
const size_t timestamps_per_call_;
|
||||
Expand* expand_;
|
||||
SyncBuffer* sync_buffer_;
|
||||
int16_t expanded_downsampled_[kExpandDownsampLength];
|
||||
|
||||
@ -26,7 +26,7 @@ class MockAudioDecoder : public AudioDecoder {
|
||||
Decode,
|
||||
int(const uint8_t*, size_t, int, size_t, int16_t*, SpeechType*));
|
||||
MOCK_CONST_METHOD0(HasDecodePlc, bool());
|
||||
MOCK_METHOD2(DecodePlc, int(int, int16_t*));
|
||||
MOCK_METHOD2(DecodePlc, size_t(size_t, int16_t*));
|
||||
MOCK_METHOD0(Init, int());
|
||||
MOCK_METHOD5(IncomingPacket, int(const uint8_t*, size_t, uint16_t, uint32_t,
|
||||
uint32_t));
|
||||
|
||||
@ -25,8 +25,8 @@ class MockBufferLevelFilter : public BufferLevelFilter {
|
||||
MOCK_METHOD0(Reset,
|
||||
void());
|
||||
MOCK_METHOD3(Update,
|
||||
void(int buffer_size_packets, int time_stretched_samples,
|
||||
int packet_len_samples));
|
||||
void(size_t buffer_size_packets, int time_stretched_samples,
|
||||
size_t packet_len_samples));
|
||||
MOCK_METHOD1(SetTargetBufferLevel,
|
||||
void(int target_buffer_level));
|
||||
MOCK_CONST_METHOD0(filtered_current_level,
|
||||
|
||||
@ -19,7 +19,8 @@ namespace webrtc {
|
||||
|
||||
class MockDelayManager : public DelayManager {
|
||||
public:
|
||||
MockDelayManager(int max_packets_in_buffer, DelayPeakDetector* peak_detector)
|
||||
MockDelayManager(size_t max_packets_in_buffer,
|
||||
DelayPeakDetector* peak_detector)
|
||||
: DelayManager(max_packets_in_buffer, peak_detector) {}
|
||||
virtual ~MockDelayManager() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
|
||||
@ -26,7 +26,7 @@ class MockDtmfToneGenerator : public DtmfToneGenerator {
|
||||
MOCK_METHOD0(Reset,
|
||||
void());
|
||||
MOCK_METHOD2(Generate,
|
||||
int(int num_samples, AudioMultiVector* output));
|
||||
int(size_t num_samples, AudioMultiVector* output));
|
||||
MOCK_CONST_METHOD0(initialized,
|
||||
bool());
|
||||
};
|
||||
|
||||
@ -36,10 +36,9 @@ class ExternalPcm16B : public AudioDecoder {
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) override {
|
||||
int16_t ret = WebRtcPcm16b_Decode(
|
||||
encoded, static_cast<int16_t>(encoded_len), decoded);
|
||||
size_t ret = WebRtcPcm16b_Decode(encoded, encoded_len, decoded);
|
||||
*speech_type = ConvertSpeechType(1);
|
||||
return ret;
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
size_t Channels() const override { return 1; }
|
||||
|
||||
@ -79,7 +78,7 @@ class MockExternalPcm16B : public ExternalPcm16B {
|
||||
MOCK_CONST_METHOD0(HasDecodePlc,
|
||||
bool());
|
||||
MOCK_METHOD2(DecodePlc,
|
||||
int(int num_frames, int16_t* decoded));
|
||||
size_t(size_t num_frames, int16_t* decoded));
|
||||
MOCK_METHOD0(Init,
|
||||
int());
|
||||
MOCK_METHOD5(IncomingPacket,
|
||||
|
||||
@ -41,7 +41,7 @@ class MockPacketBuffer : public PacketBuffer {
|
||||
MOCK_CONST_METHOD0(NextRtpHeader,
|
||||
const RTPHeader*());
|
||||
MOCK_METHOD1(GetNextPacket,
|
||||
Packet*(int* discard_count));
|
||||
Packet*(size_t* discard_count));
|
||||
MOCK_METHOD0(DiscardNextPacket,
|
||||
int());
|
||||
MOCK_METHOD2(DiscardOldPackets,
|
||||
@ -49,7 +49,7 @@ class MockPacketBuffer : public PacketBuffer {
|
||||
MOCK_METHOD1(DiscardAllOldPackets,
|
||||
int(uint32_t timestamp_limit));
|
||||
MOCK_CONST_METHOD0(NumPacketsInBuffer,
|
||||
int());
|
||||
size_t());
|
||||
MOCK_METHOD1(IncrementWaitingTimes,
|
||||
void(int));
|
||||
MOCK_CONST_METHOD0(current_memory_bytes,
|
||||
|
||||
@ -169,7 +169,7 @@ class NetEqExternalDecoderUnitTest : public test::NetEqExternalDecoderTest {
|
||||
class NetEqExternalVsInternalDecoderTest : public NetEqExternalDecoderUnitTest,
|
||||
public ::testing::Test {
|
||||
protected:
|
||||
static const int kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
|
||||
NetEqExternalVsInternalDecoderTest()
|
||||
: NetEqExternalDecoderUnitTest(kDecoderPCM16Bswb32kHz,
|
||||
@ -188,7 +188,7 @@ class NetEqExternalVsInternalDecoderTest : public NetEqExternalDecoderUnitTest,
|
||||
|
||||
void GetAndVerifyOutput() override {
|
||||
NetEqOutputType output_type;
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int num_channels;
|
||||
// Get audio from internal decoder instance.
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
@ -198,12 +198,13 @@ class NetEqExternalVsInternalDecoderTest : public NetEqExternalDecoderUnitTest,
|
||||
&num_channels,
|
||||
&output_type));
|
||||
EXPECT_EQ(1, num_channels);
|
||||
EXPECT_EQ(kOutputLengthMs * sample_rate_hz_ / 1000, samples_per_channel);
|
||||
EXPECT_EQ(static_cast<size_t>(kOutputLengthMs * sample_rate_hz_ / 1000),
|
||||
samples_per_channel);
|
||||
|
||||
// Get audio from external decoder instance.
|
||||
samples_per_channel = GetOutputAudio(kMaxBlockSize, output_, &output_type);
|
||||
|
||||
for (int i = 0; i < samples_per_channel; ++i) {
|
||||
for (size_t i = 0; i < samples_per_channel; ++i) {
|
||||
ASSERT_EQ(output_[i], output_internal_[i]) <<
|
||||
"Diff in sample " << i << ".";
|
||||
}
|
||||
@ -240,7 +241,7 @@ TEST_F(NetEqExternalVsInternalDecoderTest, RunTest) {
|
||||
class LargeTimestampJumpTest : public NetEqExternalDecoderUnitTest,
|
||||
public ::testing::Test {
|
||||
protected:
|
||||
static const int kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
|
||||
enum TestStates {
|
||||
kInitialPhase,
|
||||
@ -293,7 +294,7 @@ class LargeTimestampJumpTest : public NetEqExternalDecoderUnitTest,
|
||||
}
|
||||
|
||||
void GetAndVerifyOutput() override {
|
||||
int num_samples;
|
||||
size_t num_samples;
|
||||
NetEqOutputType output_type;
|
||||
num_samples = GetOutputAudio(kMaxBlockSize, output_, &output_type);
|
||||
UpdateState(output_type);
|
||||
@ -303,7 +304,7 @@ class LargeTimestampJumpTest : public NetEqExternalDecoderUnitTest,
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_samples; ++i) {
|
||||
for (size_t i = 0; i < num_samples; ++i) {
|
||||
if (output_[i] != 0)
|
||||
return;
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/base/logging.h"
|
||||
#include "webrtc/base/safe_conversions.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/accelerate.h"
|
||||
@ -104,7 +105,7 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
|
||||
}
|
||||
fs_hz_ = fs;
|
||||
fs_mult_ = fs / 8000;
|
||||
output_size_samples_ = kOutputSizeMs * 8 * fs_mult_;
|
||||
output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
|
||||
decoder_frame_length_ = 3 * output_size_samples_;
|
||||
WebRtcSpl_Init();
|
||||
if (create_components) {
|
||||
@ -154,7 +155,7 @@ int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
|
||||
}
|
||||
|
||||
int NetEqImpl::GetAudio(size_t max_length, int16_t* output_audio,
|
||||
int* samples_per_channel, int* num_channels,
|
||||
size_t* samples_per_channel, int* num_channels,
|
||||
NetEqOutputType* type) {
|
||||
CriticalSectionScoped lock(crit_sect_.get());
|
||||
LOG(LS_VERBOSE) << "GetAudio";
|
||||
@ -305,10 +306,10 @@ NetEqPlayoutMode NetEqImpl::PlayoutMode() const {
|
||||
int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
|
||||
CriticalSectionScoped lock(crit_sect_.get());
|
||||
assert(decoder_database_.get());
|
||||
const int total_samples_in_buffers =
|
||||
const size_t total_samples_in_buffers =
|
||||
packet_buffer_->NumSamplesInBuffer(decoder_database_.get(),
|
||||
decoder_frame_length_) +
|
||||
static_cast<int>(sync_buffer_->FutureLength());
|
||||
sync_buffer_->FutureLength();
|
||||
assert(delay_manager_.get());
|
||||
assert(decision_logic_.get());
|
||||
stats_.GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
|
||||
@ -603,7 +604,7 @@ int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
|
||||
}
|
||||
|
||||
// Insert packets in buffer.
|
||||
int temp_bufsize = packet_buffer_->NumPacketsInBuffer();
|
||||
size_t temp_bufsize = packet_buffer_->NumPacketsInBuffer();
|
||||
ret = packet_buffer_->InsertPacketList(
|
||||
&packet_list,
|
||||
*decoder_database_,
|
||||
@ -665,7 +666,8 @@ int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
|
||||
if ((temp_bufsize > 0) &&
|
||||
(temp_bufsize != decision_logic_->packet_length_samples())) {
|
||||
decision_logic_->set_packet_length_samples(temp_bufsize);
|
||||
delay_manager_->SetPacketAudioLength((1000 * temp_bufsize) / fs_hz_);
|
||||
delay_manager_->SetPacketAudioLength(
|
||||
static_cast<int>((1000 * temp_bufsize) / fs_hz_));
|
||||
}
|
||||
|
||||
// Update statistics.
|
||||
@ -688,7 +690,7 @@ int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
|
||||
|
||||
int NetEqImpl::GetAudioInternal(size_t max_length,
|
||||
int16_t* output,
|
||||
int* samples_per_channel,
|
||||
size_t* samples_per_channel,
|
||||
int* num_channels) {
|
||||
PacketList packet_list;
|
||||
DtmfEvent dtmf_event;
|
||||
@ -712,7 +714,7 @@ int NetEqImpl::GetAudioInternal(size_t max_length,
|
||||
assert(vad_.get());
|
||||
bool sid_frame_available =
|
||||
(operation == kRfc3389Cng && !packet_list.empty());
|
||||
vad_->Update(decoded_buffer_.get(), length, speech_type,
|
||||
vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
|
||||
sid_frame_available, fs_hz_);
|
||||
|
||||
algorithm_buffer_->Clear();
|
||||
@ -811,12 +813,11 @@ int NetEqImpl::GetAudioInternal(size_t max_length,
|
||||
LOG(LS_WARNING) << "Output array is too short. " << max_length << " < " <<
|
||||
output_size_samples_ << " * " << sync_buffer_->Channels();
|
||||
num_output_samples = max_length;
|
||||
num_output_samples_per_channel = static_cast<int>(
|
||||
max_length / sync_buffer_->Channels());
|
||||
num_output_samples_per_channel = max_length / sync_buffer_->Channels();
|
||||
}
|
||||
const int samples_from_sync =
|
||||
static_cast<int>(sync_buffer_->GetNextAudioInterleaved(
|
||||
num_output_samples_per_channel, output));
|
||||
const size_t samples_from_sync =
|
||||
sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel,
|
||||
output);
|
||||
*num_channels = static_cast<int>(sync_buffer_->Channels());
|
||||
LOG(LS_VERBOSE) << "Sync buffer (" << *num_channels << " channel(s)):" <<
|
||||
" insert " << algorithm_buffer_->Size() << " samples, extract " <<
|
||||
@ -922,7 +923,8 @@ int NetEqImpl::GetDecision(Operations* operation,
|
||||
last_mode_ == kModePreemptiveExpandSuccess ||
|
||||
last_mode_ == kModePreemptiveExpandLowEnergy) {
|
||||
// Subtract (samples_left + output_size_samples_) from sampleMemory.
|
||||
decision_logic_->AddSampleMemory(-(samples_left + output_size_samples_));
|
||||
decision_logic_->AddSampleMemory(
|
||||
-(samples_left + rtc::checked_cast<int>(output_size_samples_)));
|
||||
}
|
||||
|
||||
// Check if it is time to play a DTMF event.
|
||||
@ -947,8 +949,10 @@ int NetEqImpl::GetDecision(Operations* operation,
|
||||
// Check if we already have enough samples in the |sync_buffer_|. If so,
|
||||
// change decision to normal, unless the decision was merge, accelerate, or
|
||||
// preemptive expand.
|
||||
if (samples_left >= output_size_samples_ && *operation != kMerge &&
|
||||
*operation != kAccelerate && *operation != kFastAccelerate &&
|
||||
if (samples_left >= rtc::checked_cast<int>(output_size_samples_) &&
|
||||
*operation != kMerge &&
|
||||
*operation != kAccelerate &&
|
||||
*operation != kFastAccelerate &&
|
||||
*operation != kPreemptiveExpand) {
|
||||
*operation = kNormal;
|
||||
return 0;
|
||||
@ -996,10 +1000,10 @@ int NetEqImpl::GetDecision(Operations* operation,
|
||||
stats_.ResetMcu();
|
||||
}
|
||||
|
||||
int required_samples = output_size_samples_;
|
||||
const int samples_10_ms = 80 * fs_mult_;
|
||||
const int samples_20_ms = 2 * samples_10_ms;
|
||||
const int samples_30_ms = 3 * samples_10_ms;
|
||||
size_t required_samples = output_size_samples_;
|
||||
const size_t samples_10_ms = static_cast<size_t>(80 * fs_mult_);
|
||||
const size_t samples_20_ms = 2 * samples_10_ms;
|
||||
const size_t samples_30_ms = 3 * samples_10_ms;
|
||||
|
||||
switch (*operation) {
|
||||
case kExpand: {
|
||||
@ -1028,17 +1032,17 @@ int NetEqImpl::GetDecision(Operations* operation,
|
||||
case kAccelerate:
|
||||
case kFastAccelerate: {
|
||||
// In order to do an accelerate we need at least 30 ms of audio data.
|
||||
if (samples_left >= samples_30_ms) {
|
||||
if (samples_left >= static_cast<int>(samples_30_ms)) {
|
||||
// Already have enough data, so we do not need to extract any more.
|
||||
decision_logic_->set_sample_memory(samples_left);
|
||||
decision_logic_->set_prev_time_scale(true);
|
||||
return 0;
|
||||
} else if (samples_left >= samples_10_ms &&
|
||||
} else if (samples_left >= static_cast<int>(samples_10_ms) &&
|
||||
decoder_frame_length_ >= samples_30_ms) {
|
||||
// Avoid decoding more data as it might overflow the playout buffer.
|
||||
*operation = kNormal;
|
||||
return 0;
|
||||
} else if (samples_left < samples_20_ms &&
|
||||
} else if (samples_left < static_cast<int>(samples_20_ms) &&
|
||||
decoder_frame_length_ < samples_30_ms) {
|
||||
// Build up decoded data by decoding at least 20 ms of audio data. Do
|
||||
// not perform accelerate yet, but wait until we only need to do one
|
||||
@ -1056,8 +1060,8 @@ int NetEqImpl::GetDecision(Operations* operation,
|
||||
case kPreemptiveExpand: {
|
||||
// In order to do a preemptive expand we need at least 30 ms of decoded
|
||||
// audio data.
|
||||
if ((samples_left >= samples_30_ms) ||
|
||||
(samples_left >= samples_10_ms &&
|
||||
if ((samples_left >= static_cast<int>(samples_30_ms)) ||
|
||||
(samples_left >= static_cast<int>(samples_10_ms) &&
|
||||
decoder_frame_length_ >= samples_30_ms)) {
|
||||
// Already have enough data, so we do not need to extract any more.
|
||||
// Or, avoid decoding more data as it might overflow the playout buffer.
|
||||
@ -1066,7 +1070,7 @@ int NetEqImpl::GetDecision(Operations* operation,
|
||||
decision_logic_->set_prev_time_scale(true);
|
||||
return 0;
|
||||
}
|
||||
if (samples_left < samples_20_ms &&
|
||||
if (samples_left < static_cast<int>(samples_20_ms) &&
|
||||
decoder_frame_length_ < samples_30_ms) {
|
||||
// Build up decoded data by decoding at least 20 ms of audio data.
|
||||
// Still try to perform preemptive expand.
|
||||
@ -1123,7 +1127,7 @@ int NetEqImpl::GetDecision(Operations* operation,
|
||||
|
||||
if (*operation == kAccelerate || *operation == kFastAccelerate) {
|
||||
// Check that we have enough data (30ms) to do accelerate.
|
||||
if (extracted_samples + samples_left < samples_30_ms) {
|
||||
if (extracted_samples + samples_left < static_cast<int>(samples_30_ms)) {
|
||||
// TODO(hlundin): Write test for this.
|
||||
// Not enough, do normal operation instead.
|
||||
*operation = kNormal;
|
||||
@ -1274,7 +1278,7 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation,
|
||||
memset(&decoded_buffer_[*decoded_length], 0,
|
||||
decoder_frame_length_ * decoder->Channels() *
|
||||
sizeof(decoded_buffer_[0]));
|
||||
decode_length = decoder_frame_length_;
|
||||
decode_length = rtc::checked_cast<int>(decoder_frame_length_);
|
||||
} else if (!packet->primary) {
|
||||
// This is a redundant payload; call the special decoder method.
|
||||
LOG(LS_VERBOSE) << "Decoding packet (redundant):" <<
|
||||
@ -1307,7 +1311,7 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation,
|
||||
*decoded_length += decode_length;
|
||||
// Update |decoder_frame_length_| with number of samples per channel.
|
||||
decoder_frame_length_ =
|
||||
decode_length / static_cast<int>(decoder->Channels());
|
||||
static_cast<size_t>(decode_length) / decoder->Channels();
|
||||
LOG(LS_VERBOSE) << "Decoded " << decode_length << " samples ("
|
||||
<< decoder->Channels() << " channel(s) -> "
|
||||
<< decoder_frame_length_ << " samples per channel)";
|
||||
@ -1366,11 +1370,11 @@ void NetEqImpl::DoMerge(int16_t* decoded_buffer, size_t decoded_length,
|
||||
AudioDecoder::SpeechType speech_type, bool play_dtmf) {
|
||||
assert(mute_factor_array_.get());
|
||||
assert(merge_.get());
|
||||
int new_length = merge_->Process(decoded_buffer, decoded_length,
|
||||
mute_factor_array_.get(),
|
||||
algorithm_buffer_.get());
|
||||
int expand_length_correction = new_length -
|
||||
static_cast<int>(decoded_length / algorithm_buffer_->Channels());
|
||||
size_t new_length = merge_->Process(decoded_buffer, decoded_length,
|
||||
mute_factor_array_.get(),
|
||||
algorithm_buffer_.get());
|
||||
size_t expand_length_correction = new_length -
|
||||
decoded_length / algorithm_buffer_->Channels();
|
||||
|
||||
// Update in-call and post-call statistics.
|
||||
if (expand_->MuteFactor(0) == 0) {
|
||||
@ -1394,10 +1398,10 @@ void NetEqImpl::DoMerge(int16_t* decoded_buffer, size_t decoded_length,
|
||||
|
||||
int NetEqImpl::DoExpand(bool play_dtmf) {
|
||||
while ((sync_buffer_->FutureLength() - expand_->overlap_length()) <
|
||||
static_cast<size_t>(output_size_samples_)) {
|
||||
output_size_samples_) {
|
||||
algorithm_buffer_->Clear();
|
||||
int return_value = expand_->Process(algorithm_buffer_.get());
|
||||
int length = static_cast<int>(algorithm_buffer_->Size());
|
||||
size_t length = algorithm_buffer_->Size();
|
||||
|
||||
// Update in-call and post-call statistics.
|
||||
if (expand_->MuteFactor(0) == 0) {
|
||||
@ -1428,7 +1432,8 @@ int NetEqImpl::DoAccelerate(int16_t* decoded_buffer,
|
||||
AudioDecoder::SpeechType speech_type,
|
||||
bool play_dtmf,
|
||||
bool fast_accelerate) {
|
||||
const size_t required_samples = 240 * fs_mult_; // Must have 30 ms.
|
||||
const size_t required_samples =
|
||||
static_cast<size_t>(240 * fs_mult_); // Must have 30 ms.
|
||||
size_t borrowed_samples_per_channel = 0;
|
||||
size_t num_channels = algorithm_buffer_->Channels();
|
||||
size_t decoded_length_per_channel = decoded_length / num_channels;
|
||||
@ -1444,7 +1449,7 @@ int NetEqImpl::DoAccelerate(int16_t* decoded_buffer,
|
||||
decoded_length = required_samples * num_channels;
|
||||
}
|
||||
|
||||
int16_t samples_removed;
|
||||
size_t samples_removed;
|
||||
Accelerate::ReturnCodes return_code =
|
||||
accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate,
|
||||
algorithm_buffer_.get(), &samples_removed);
|
||||
@ -1501,20 +1506,20 @@ int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer,
|
||||
size_t decoded_length,
|
||||
AudioDecoder::SpeechType speech_type,
|
||||
bool play_dtmf) {
|
||||
const size_t required_samples = 240 * fs_mult_; // Must have 30 ms.
|
||||
const size_t required_samples =
|
||||
static_cast<size_t>(240 * fs_mult_); // Must have 30 ms.
|
||||
size_t num_channels = algorithm_buffer_->Channels();
|
||||
int borrowed_samples_per_channel = 0;
|
||||
int old_borrowed_samples_per_channel = 0;
|
||||
size_t borrowed_samples_per_channel = 0;
|
||||
size_t old_borrowed_samples_per_channel = 0;
|
||||
size_t decoded_length_per_channel = decoded_length / num_channels;
|
||||
if (decoded_length_per_channel < required_samples) {
|
||||
// Must move data from the |sync_buffer_| in order to get 30 ms.
|
||||
borrowed_samples_per_channel = static_cast<int>(required_samples -
|
||||
decoded_length_per_channel);
|
||||
borrowed_samples_per_channel =
|
||||
required_samples - decoded_length_per_channel;
|
||||
// Calculate how many of these were already played out.
|
||||
const int future_length = static_cast<int>(sync_buffer_->FutureLength());
|
||||
old_borrowed_samples_per_channel =
|
||||
(borrowed_samples_per_channel > future_length) ?
|
||||
(borrowed_samples_per_channel - future_length) : 0;
|
||||
(borrowed_samples_per_channel > sync_buffer_->FutureLength()) ?
|
||||
(borrowed_samples_per_channel - sync_buffer_->FutureLength()) : 0;
|
||||
memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels],
|
||||
decoded_buffer,
|
||||
sizeof(int16_t) * decoded_length);
|
||||
@ -1523,9 +1528,9 @@ int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer,
|
||||
decoded_length = required_samples * num_channels;
|
||||
}
|
||||
|
||||
int16_t samples_added;
|
||||
size_t samples_added;
|
||||
PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process(
|
||||
decoded_buffer, static_cast<int>(decoded_length),
|
||||
decoded_buffer, decoded_length,
|
||||
old_borrowed_samples_per_channel,
|
||||
algorithm_buffer_.get(), &samples_added);
|
||||
stats_.PreemptiveExpandedSamples(samples_added);
|
||||
@ -1719,17 +1724,14 @@ int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) {
|
||||
|
||||
void NetEqImpl::DoAlternativePlc(bool increase_timestamp) {
|
||||
AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
|
||||
int length;
|
||||
size_t length;
|
||||
if (decoder && decoder->HasDecodePlc()) {
|
||||
// Use the decoder's packet-loss concealment.
|
||||
// TODO(hlundin): Will probably need a longer buffer for multi-channel.
|
||||
int16_t decoded_buffer[kMaxFrameSize];
|
||||
length = decoder->DecodePlc(1, decoded_buffer);
|
||||
if (length > 0) {
|
||||
if (length > 0)
|
||||
algorithm_buffer_->PushBackInterleaved(decoded_buffer, length);
|
||||
} else {
|
||||
length = 0;
|
||||
}
|
||||
} else {
|
||||
// Do simple zero-stuffing.
|
||||
length = output_size_samples_;
|
||||
@ -1746,14 +1748,14 @@ void NetEqImpl::DoAlternativePlc(bool increase_timestamp) {
|
||||
int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels,
|
||||
int16_t* output) const {
|
||||
size_t out_index = 0;
|
||||
int overdub_length = output_size_samples_; // Default value.
|
||||
size_t overdub_length = output_size_samples_; // Default value.
|
||||
|
||||
if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) {
|
||||
// Special operation for transition from "DTMF only" to "DTMF overdub".
|
||||
out_index = std::min(
|
||||
sync_buffer_->dtmf_index() - sync_buffer_->next_index(),
|
||||
static_cast<size_t>(output_size_samples_));
|
||||
overdub_length = output_size_samples_ - static_cast<int>(out_index);
|
||||
output_size_samples_);
|
||||
overdub_length = output_size_samples_ - out_index;
|
||||
}
|
||||
|
||||
AudioMultiVector dtmf_output(num_channels);
|
||||
@ -1765,13 +1767,14 @@ int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels,
|
||||
if (dtmf_return_value == 0) {
|
||||
dtmf_return_value = dtmf_tone_generator_->Generate(overdub_length,
|
||||
&dtmf_output);
|
||||
assert((size_t) overdub_length == dtmf_output.Size());
|
||||
assert(overdub_length == dtmf_output.Size());
|
||||
}
|
||||
dtmf_output.ReadInterleaved(overdub_length, &output[out_index]);
|
||||
return dtmf_return_value < 0 ? dtmf_return_value : 0;
|
||||
}
|
||||
|
||||
int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) {
|
||||
int NetEqImpl::ExtractPackets(size_t required_samples,
|
||||
PacketList* packet_list) {
|
||||
bool first_packet = true;
|
||||
uint8_t prev_payload_type = 0;
|
||||
uint32_t prev_timestamp = 0;
|
||||
@ -1790,7 +1793,7 @@ int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) {
|
||||
// Packet extraction loop.
|
||||
do {
|
||||
timestamp_ = header->timestamp;
|
||||
int discard_count = 0;
|
||||
size_t discard_count = 0;
|
||||
Packet* packet = packet_buffer_->GetNextPacket(&discard_count);
|
||||
// |header| may be invalid after the |packet_buffer_| operation.
|
||||
header = NULL;
|
||||
@ -1819,7 +1822,7 @@ int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) {
|
||||
packet->header.payloadType);
|
||||
if (decoder) {
|
||||
if (packet->sync_packet) {
|
||||
packet_duration = decoder_frame_length_;
|
||||
packet_duration = rtc::checked_cast<int>(decoder_frame_length_);
|
||||
} else {
|
||||
if (packet->primary) {
|
||||
packet_duration = decoder->PacketDuration(packet->payload,
|
||||
@ -1838,7 +1841,7 @@ int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) {
|
||||
if (packet_duration <= 0) {
|
||||
// Decoder did not return a packet duration. Assume that the packet
|
||||
// contains the same number of samples as the previous one.
|
||||
packet_duration = decoder_frame_length_;
|
||||
packet_duration = rtc::checked_cast<int>(decoder_frame_length_);
|
||||
}
|
||||
extracted_samples = packet->header.timestamp - first_timestamp +
|
||||
packet_duration;
|
||||
@ -1848,7 +1851,7 @@ int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) {
|
||||
next_packet_available = false;
|
||||
if (header && prev_payload_type == header->payloadType) {
|
||||
int16_t seq_no_diff = header->sequenceNumber - prev_sequence_number;
|
||||
int32_t ts_diff = header->timestamp - prev_timestamp;
|
||||
size_t ts_diff = header->timestamp - prev_timestamp;
|
||||
if (seq_no_diff == 1 ||
|
||||
(seq_no_diff == 0 && ts_diff == decoder_frame_length_)) {
|
||||
// The next sequence number is available, or the next part of a packet
|
||||
@ -1857,7 +1860,8 @@ int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) {
|
||||
}
|
||||
prev_sequence_number = header->sequenceNumber;
|
||||
}
|
||||
} while (extracted_samples < required_samples && next_packet_available);
|
||||
} while (extracted_samples < rtc::checked_cast<int>(required_samples) &&
|
||||
next_packet_available);
|
||||
|
||||
if (extracted_samples > 0) {
|
||||
// Delete old packets only when we are going to decode something. Otherwise,
|
||||
@ -1886,7 +1890,7 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
|
||||
|
||||
fs_hz_ = fs_hz;
|
||||
fs_mult_ = fs_hz / 8000;
|
||||
output_size_samples_ = kOutputSizeMs * 8 * fs_mult_;
|
||||
output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
|
||||
decoder_frame_length_ = 3 * output_size_samples_; // Initialize to 30ms.
|
||||
|
||||
last_mode_ = kModeNormal;
|
||||
@ -1931,9 +1935,7 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
|
||||
accelerate_.reset(
|
||||
accelerate_factory_->Create(fs_hz, channels, *background_noise_));
|
||||
preemptive_expand_.reset(preemptive_expand_factory_->Create(
|
||||
fs_hz, channels,
|
||||
*background_noise_,
|
||||
static_cast<int>(expand_->overlap_length())));
|
||||
fs_hz, channels, *background_noise_, expand_->overlap_length()));
|
||||
|
||||
// Delete ComfortNoise object and create a new one.
|
||||
comfort_noise_.reset(new ComfortNoise(fs_hz, decoder_database_.get(),
|
||||
|
||||
@ -106,7 +106,7 @@ class NetEqImpl : public webrtc::NetEq {
|
||||
// Returns kOK on success, or kFail in case of an error.
|
||||
int GetAudio(size_t max_length,
|
||||
int16_t* output_audio,
|
||||
int* samples_per_channel,
|
||||
size_t* samples_per_channel,
|
||||
int* num_channels,
|
||||
NetEqOutputType* type) override;
|
||||
|
||||
@ -203,9 +203,9 @@ class NetEqImpl : public webrtc::NetEq {
|
||||
|
||||
protected:
|
||||
static const int kOutputSizeMs = 10;
|
||||
static const int kMaxFrameSize = 2880; // 60 ms @ 48 kHz.
|
||||
static const size_t kMaxFrameSize = 2880; // 60 ms @ 48 kHz.
|
||||
// TODO(hlundin): Provide a better value for kSyncBufferSize.
|
||||
static const int kSyncBufferSize = 2 * kMaxFrameSize;
|
||||
static const size_t kSyncBufferSize = 2 * kMaxFrameSize;
|
||||
|
||||
// Inserts a new packet into NetEq. This is used by the InsertPacket method
|
||||
// above. Returns 0 on success, otherwise an error code.
|
||||
@ -225,7 +225,7 @@ class NetEqImpl : public webrtc::NetEq {
|
||||
// Returns 0 on success, otherwise an error code.
|
||||
int GetAudioInternal(size_t max_length,
|
||||
int16_t* output,
|
||||
int* samples_per_channel,
|
||||
size_t* samples_per_channel,
|
||||
int* num_channels) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Provides a decision to the GetAudioInternal method. The decision what to
|
||||
@ -318,7 +318,7 @@ class NetEqImpl : public webrtc::NetEq {
|
||||
// |required_samples| samples. The packets are inserted into |packet_list|.
|
||||
// Returns the number of samples that the packets in the list will produce, or
|
||||
// -1 in case of an error.
|
||||
int ExtractPackets(int required_samples, PacketList* packet_list)
|
||||
int ExtractPackets(size_t required_samples, PacketList* packet_list)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Resets various variables and objects to new values based on the sample rate
|
||||
@ -375,8 +375,8 @@ class NetEqImpl : public webrtc::NetEq {
|
||||
StatisticsCalculator stats_ GUARDED_BY(crit_sect_);
|
||||
int fs_hz_ GUARDED_BY(crit_sect_);
|
||||
int fs_mult_ GUARDED_BY(crit_sect_);
|
||||
int output_size_samples_ GUARDED_BY(crit_sect_);
|
||||
int decoder_frame_length_ GUARDED_BY(crit_sect_);
|
||||
size_t output_size_samples_ GUARDED_BY(crit_sect_);
|
||||
size_t decoder_frame_length_ GUARDED_BY(crit_sect_);
|
||||
Modes last_mode_ GUARDED_BY(crit_sect_);
|
||||
rtc::scoped_ptr<int16_t[]> mute_factor_array_ GUARDED_BY(crit_sect_);
|
||||
size_t decoded_buffer_length_ GUARDED_BY(crit_sect_);
|
||||
|
||||
@ -384,7 +384,7 @@ TEST_F(NetEqImplTest, InsertPacketsUntilBufferIsFull) {
|
||||
neteq_->RegisterPayloadType(kDecoderPCM16B, kPayloadType));
|
||||
|
||||
// Insert packets. The buffer should not flush.
|
||||
for (int i = 1; i <= config_.max_packets_in_buffer; ++i) {
|
||||
for (size_t i = 1; i <= config_.max_packets_in_buffer; ++i) {
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
neteq_->InsertPacket(
|
||||
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
|
||||
@ -398,7 +398,7 @@ TEST_F(NetEqImplTest, InsertPacketsUntilBufferIsFull) {
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
neteq_->InsertPacket(
|
||||
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
|
||||
EXPECT_EQ(1, packet_buffer_->NumPacketsInBuffer());
|
||||
EXPECT_EQ(1u, packet_buffer_->NumPacketsInBuffer());
|
||||
const RTPHeader* test_header = packet_buffer_->NextRtpHeader();
|
||||
EXPECT_EQ(rtp_header.header.timestamp, test_header->timestamp);
|
||||
EXPECT_EQ(rtp_header.header.sequenceNumber, test_header->sequenceNumber);
|
||||
@ -413,7 +413,8 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
|
||||
const uint8_t kPayloadType = 17; // Just an arbitrary number.
|
||||
const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
|
||||
const int kSampleRateHz = 8000;
|
||||
const int kPayloadLengthSamples = 10 * kSampleRateHz / 1000; // 10 ms.
|
||||
const size_t kPayloadLengthSamples =
|
||||
static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms.
|
||||
const size_t kPayloadLengthBytes = kPayloadLengthSamples;
|
||||
uint8_t payload[kPayloadLengthBytes] = {0};
|
||||
WebRtcRTPHeader rtp_header;
|
||||
@ -466,9 +467,9 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
|
||||
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
|
||||
|
||||
// Pull audio once.
|
||||
const int kMaxOutputSize = 10 * kSampleRateHz / 1000;
|
||||
const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
|
||||
int16_t output[kMaxOutputSize];
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
EXPECT_EQ(
|
||||
@ -480,7 +481,8 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
|
||||
EXPECT_EQ(kOutputNormal, type);
|
||||
|
||||
// Start with a simple check that the fake decoder is behaving as expected.
|
||||
EXPECT_EQ(kPayloadLengthSamples, decoder_.next_value() - 1);
|
||||
EXPECT_EQ(kPayloadLengthSamples,
|
||||
static_cast<size_t>(decoder_.next_value() - 1));
|
||||
|
||||
// The value of the last of the output samples is the same as the number of
|
||||
// samples played from the decoded packet. Thus, this number + the RTP
|
||||
@ -500,7 +502,7 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
|
||||
// Check that the number of samples still to play from the sync buffer add
|
||||
// up with what was already played out.
|
||||
EXPECT_EQ(kPayloadLengthSamples - output[samples_per_channel - 1],
|
||||
static_cast<int>(sync_buffer->FutureLength()));
|
||||
sync_buffer->FutureLength());
|
||||
}
|
||||
|
||||
TEST_F(NetEqImplTest, ReorderedPacket) {
|
||||
@ -510,7 +512,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
|
||||
const uint8_t kPayloadType = 17; // Just an arbitrary number.
|
||||
const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
|
||||
const int kSampleRateHz = 8000;
|
||||
const int kPayloadLengthSamples = 10 * kSampleRateHz / 1000; // 10 ms.
|
||||
const size_t kPayloadLengthSamples =
|
||||
static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms.
|
||||
const size_t kPayloadLengthBytes = kPayloadLengthSamples;
|
||||
uint8_t payload[kPayloadLengthBytes] = {0};
|
||||
WebRtcRTPHeader rtp_header;
|
||||
@ -544,9 +547,9 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
|
||||
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
|
||||
|
||||
// Pull audio once.
|
||||
const int kMaxOutputSize = 10 * kSampleRateHz / 1000;
|
||||
const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
|
||||
int16_t output[kMaxOutputSize];
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
EXPECT_EQ(
|
||||
@ -606,7 +609,8 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
|
||||
const uint8_t kPayloadType = 17; // Just an arbitrary number.
|
||||
const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
|
||||
const int kSampleRateHz = 8000;
|
||||
const int kPayloadLengthSamples = 10 * kSampleRateHz / 1000; // 10 ms.
|
||||
const size_t kPayloadLengthSamples =
|
||||
static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms.
|
||||
const size_t kPayloadLengthBytes = kPayloadLengthSamples;
|
||||
uint8_t payload[kPayloadLengthBytes] = {0};
|
||||
WebRtcRTPHeader rtp_header;
|
||||
@ -623,9 +627,9 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
|
||||
EXPECT_EQ(NetEq::kUnknownRtpPayloadType, neteq_->LastError());
|
||||
|
||||
// Pull audio once.
|
||||
const int kMaxOutputSize = 10 * kSampleRateHz / 1000;
|
||||
const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateHz / 1000);
|
||||
int16_t output[kMaxOutputSize];
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
@ -641,7 +645,7 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
|
||||
neteq_->RegisterPayloadType(kDecoderPCM16B, kPayloadType));
|
||||
|
||||
// Insert 10 packets.
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
rtp_header.header.sequenceNumber++;
|
||||
rtp_header.header.timestamp += kPayloadLengthSamples;
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
@ -651,7 +655,7 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
|
||||
}
|
||||
|
||||
// Pull audio repeatedly and make sure we get normal output, that is not PLC.
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel,
|
||||
&num_channels, &type));
|
||||
@ -672,8 +676,9 @@ TEST_F(NetEqImplTest, CodecInternalCng) {
|
||||
const uint8_t kPayloadType = 17; // Just an arbitrary number.
|
||||
const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
|
||||
const int kSampleRateKhz = 48;
|
||||
const int kPayloadLengthSamples = 20 * kSampleRateKhz; // 20 ms.
|
||||
const int kPayloadLengthBytes = 10;
|
||||
const size_t kPayloadLengthSamples =
|
||||
static_cast<size_t>(20 * kSampleRateKhz); // 20 ms.
|
||||
const size_t kPayloadLengthBytes = 10;
|
||||
uint8_t payload[kPayloadLengthBytes] = {0};
|
||||
int16_t dummy_output[kPayloadLengthSamples] = {0};
|
||||
|
||||
@ -736,9 +741,9 @@ TEST_F(NetEqImplTest, CodecInternalCng) {
|
||||
neteq_->InsertPacket(
|
||||
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
|
||||
|
||||
const int kMaxOutputSize = 10 * kSampleRateKhz;
|
||||
const size_t kMaxOutputSize = static_cast<size_t>(10 * kSampleRateKhz);
|
||||
int16_t output[kMaxOutputSize];
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int num_channels;
|
||||
uint32_t timestamp;
|
||||
uint32_t last_timestamp;
|
||||
@ -762,7 +767,7 @@ TEST_F(NetEqImplTest, CodecInternalCng) {
|
||||
&num_channels, &type));
|
||||
EXPECT_TRUE(neteq_->GetPlayoutTimestamp(&last_timestamp));
|
||||
|
||||
for (int i = 1; i < 6; ++i) {
|
||||
for (size_t i = 1; i < 6; ++i) {
|
||||
ASSERT_EQ(kMaxOutputSize, samples_per_channel);
|
||||
EXPECT_EQ(1, num_channels);
|
||||
EXPECT_EQ(expected_type[i - 1], type);
|
||||
@ -783,7 +788,7 @@ TEST_F(NetEqImplTest, CodecInternalCng) {
|
||||
neteq_->InsertPacket(
|
||||
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
|
||||
|
||||
for (int i = 6; i < 8; ++i) {
|
||||
for (size_t i = 6; i < 8; ++i) {
|
||||
ASSERT_EQ(kMaxOutputSize, samples_per_channel);
|
||||
EXPECT_EQ(1, num_channels);
|
||||
EXPECT_EQ(expected_type[i - 1], type);
|
||||
@ -811,7 +816,8 @@ TEST_F(NetEqImplTest, UnsupportedDecoder) {
|
||||
const uint32_t kReceiveTime = 17; // Value doesn't matter for this test.
|
||||
const int kSampleRateHz = 8000;
|
||||
|
||||
const int kPayloadLengthSamples = 10 * kSampleRateHz / 1000; // 10 ms.
|
||||
const size_t kPayloadLengthSamples =
|
||||
static_cast<size_t>(10 * kSampleRateHz / 1000); // 10 ms.
|
||||
const size_t kPayloadLengthBytes = 1;
|
||||
uint8_t payload[kPayloadLengthBytes]= {0};
|
||||
int16_t dummy_output[kPayloadLengthSamples * kChannels] = {0};
|
||||
@ -852,7 +858,8 @@ TEST_F(NetEqImplTest, UnsupportedDecoder) {
|
||||
dummy_output +
|
||||
kPayloadLengthSamples * kChannels),
|
||||
SetArgPointee<4>(AudioDecoder::kSpeech),
|
||||
Return(kPayloadLengthSamples * kChannels)));
|
||||
Return(static_cast<int>(
|
||||
kPayloadLengthSamples * kChannels))));
|
||||
|
||||
EXPECT_CALL(decoder_, PacketDuration(Pointee(kSecondPayloadValue),
|
||||
kPayloadLengthBytes))
|
||||
@ -879,9 +886,10 @@ TEST_F(NetEqImplTest, UnsupportedDecoder) {
|
||||
neteq_->InsertPacket(
|
||||
rtp_header, payload, kPayloadLengthBytes, kReceiveTime));
|
||||
|
||||
const int kMaxOutputSize = 10 * kSampleRateHz / 1000 * kChannels;
|
||||
const size_t kMaxOutputSize =
|
||||
static_cast<size_t>(10 * kSampleRateHz / 1000 * kChannels);
|
||||
int16_t output[kMaxOutputSize];
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@ struct TestParameters {
|
||||
class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
|
||||
protected:
|
||||
static const int kTimeStepMs = 10;
|
||||
static const int kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
static const uint8_t kPayloadTypeMono = 95;
|
||||
static const uint8_t kPayloadTypeMulti = 96;
|
||||
|
||||
@ -52,7 +52,8 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
|
||||
sample_rate_hz_(GetParam().sample_rate),
|
||||
samples_per_ms_(sample_rate_hz_ / 1000),
|
||||
frame_size_ms_(GetParam().frame_size),
|
||||
frame_size_samples_(frame_size_ms_ * samples_per_ms_),
|
||||
frame_size_samples_(
|
||||
static_cast<size_t>(frame_size_ms_ * samples_per_ms_)),
|
||||
output_size_samples_(10 * samples_per_ms_),
|
||||
rtp_generator_mono_(samples_per_ms_),
|
||||
rtp_generator_(samples_per_ms_),
|
||||
@ -212,7 +213,7 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
|
||||
}
|
||||
NetEqOutputType output_type;
|
||||
// Get audio from mono instance.
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int num_channels;
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
neteq_mono_->GetAudio(kMaxBlockSize, output_,
|
||||
@ -242,8 +243,8 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
|
||||
const int sample_rate_hz_;
|
||||
const int samples_per_ms_;
|
||||
const int frame_size_ms_;
|
||||
const int frame_size_samples_;
|
||||
const int output_size_samples_;
|
||||
const size_t frame_size_samples_;
|
||||
const size_t output_size_samples_;
|
||||
NetEq* neteq_mono_;
|
||||
NetEq* neteq_;
|
||||
test::RtpGenerator rtp_generator_mono_;
|
||||
@ -256,8 +257,8 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
|
||||
int16_t* output_multi_channel_;
|
||||
WebRtcRTPHeader rtp_header_mono_;
|
||||
WebRtcRTPHeader rtp_header_;
|
||||
int payload_size_bytes_;
|
||||
int multi_payload_size_bytes_;
|
||||
size_t payload_size_bytes_;
|
||||
size_t multi_payload_size_bytes_;
|
||||
int last_send_time_;
|
||||
int last_arrival_time_;
|
||||
rtc::scoped_ptr<test::InputAudioFile> input_file_;
|
||||
|
||||
@ -37,16 +37,16 @@ DEFINE_bool(gen_ref, false, "Generate reference files.");
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static bool IsAllZero(const int16_t* buf, int buf_length) {
|
||||
static bool IsAllZero(const int16_t* buf, size_t buf_length) {
|
||||
bool all_zero = true;
|
||||
for (int n = 0; n < buf_length && all_zero; ++n)
|
||||
for (size_t n = 0; n < buf_length && all_zero; ++n)
|
||||
all_zero = buf[n] == 0;
|
||||
return all_zero;
|
||||
}
|
||||
|
||||
static bool IsAllNonZero(const int16_t* buf, int buf_length) {
|
||||
static bool IsAllNonZero(const int16_t* buf, size_t buf_length) {
|
||||
bool all_non_zero = true;
|
||||
for (int n = 0; n < buf_length && all_non_zero; ++n)
|
||||
for (size_t n = 0; n < buf_length && all_non_zero; ++n)
|
||||
all_non_zero = buf[n] != 0;
|
||||
return all_non_zero;
|
||||
}
|
||||
@ -172,7 +172,8 @@ void RefFiles::ReadFromFileAndCompare(
|
||||
ASSERT_EQ(stats.preemptive_rate, ref_stats.preemptive_rate);
|
||||
ASSERT_EQ(stats.accelerate_rate, ref_stats.accelerate_rate);
|
||||
ASSERT_EQ(stats.clockdrift_ppm, ref_stats.clockdrift_ppm);
|
||||
ASSERT_EQ(stats.added_zero_samples, ref_stats.added_zero_samples);
|
||||
ASSERT_EQ(stats.added_zero_samples,
|
||||
static_cast<size_t>(ref_stats.added_zero_samples));
|
||||
ASSERT_EQ(stats.secondary_decoded_rate, 0);
|
||||
ASSERT_LE(stats.speech_expand_rate, ref_stats.expand_rate);
|
||||
}
|
||||
@ -220,9 +221,9 @@ class NetEqDecodingTest : public ::testing::Test {
|
||||
// NetEQ must be polled for data once every 10 ms. Thus, neither of the
|
||||
// constants below can be changed.
|
||||
static const int kTimeStepMs = 10;
|
||||
static const int kBlockSize8kHz = kTimeStepMs * 8;
|
||||
static const int kBlockSize16kHz = kTimeStepMs * 16;
|
||||
static const int kBlockSize32kHz = kTimeStepMs * 32;
|
||||
static const size_t kBlockSize8kHz = kTimeStepMs * 8;
|
||||
static const size_t kBlockSize16kHz = kTimeStepMs * 16;
|
||||
static const size_t kBlockSize32kHz = kTimeStepMs * 32;
|
||||
static const size_t kMaxBlockSize = kBlockSize32kHz;
|
||||
static const int kInitSampleRateHz = 8000;
|
||||
|
||||
@ -232,7 +233,7 @@ class NetEqDecodingTest : public ::testing::Test {
|
||||
void SelectDecoders(NetEqDecoder* used_codec);
|
||||
void LoadDecoders();
|
||||
void OpenInputFile(const std::string &rtp_file);
|
||||
void Process(int* out_len);
|
||||
void Process(size_t* out_len);
|
||||
void DecodeAndCompare(const std::string& rtp_file,
|
||||
const std::string& ref_file,
|
||||
const std::string& stat_ref_file,
|
||||
@ -272,9 +273,9 @@ class NetEqDecodingTest : public ::testing::Test {
|
||||
|
||||
// Allocating the static const so that it can be passed by reference.
|
||||
const int NetEqDecodingTest::kTimeStepMs;
|
||||
const int NetEqDecodingTest::kBlockSize8kHz;
|
||||
const int NetEqDecodingTest::kBlockSize16kHz;
|
||||
const int NetEqDecodingTest::kBlockSize32kHz;
|
||||
const size_t NetEqDecodingTest::kBlockSize8kHz;
|
||||
const size_t NetEqDecodingTest::kBlockSize16kHz;
|
||||
const size_t NetEqDecodingTest::kBlockSize32kHz;
|
||||
const size_t NetEqDecodingTest::kMaxBlockSize;
|
||||
const int NetEqDecodingTest::kInitSampleRateHz;
|
||||
|
||||
@ -334,7 +335,7 @@ void NetEqDecodingTest::OpenInputFile(const std::string &rtp_file) {
|
||||
rtp_source_.reset(test::RtpFileSource::Create(rtp_file));
|
||||
}
|
||||
|
||||
void NetEqDecodingTest::Process(int* out_len) {
|
||||
void NetEqDecodingTest::Process(size_t* out_len) {
|
||||
// Check if time to receive.
|
||||
while (packet_ && sim_clock_ >= packet_->time_ms()) {
|
||||
if (packet_->payload_length_bytes() > 0) {
|
||||
@ -358,7 +359,7 @@ void NetEqDecodingTest::Process(int* out_len) {
|
||||
ASSERT_TRUE((*out_len == kBlockSize8kHz) ||
|
||||
(*out_len == kBlockSize16kHz) ||
|
||||
(*out_len == kBlockSize32kHz));
|
||||
output_sample_rate_ = *out_len / 10 * 1000;
|
||||
output_sample_rate_ = static_cast<int>(*out_len / 10 * 1000);
|
||||
|
||||
// Increase time.
|
||||
sim_clock_ += kTimeStepMs;
|
||||
@ -394,7 +395,7 @@ void NetEqDecodingTest::DecodeAndCompare(const std::string& rtp_file,
|
||||
std::ostringstream ss;
|
||||
ss << "Lap number " << i++ << " in DecodeAndCompare while loop";
|
||||
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
|
||||
int out_len = 0;
|
||||
size_t out_len = 0;
|
||||
ASSERT_NO_FATAL_FAILURE(Process(&out_len));
|
||||
ASSERT_NO_FATAL_FAILURE(ref_files.ProcessReference(out_data_, out_len));
|
||||
|
||||
@ -498,7 +499,7 @@ TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) {
|
||||
}
|
||||
// Pull out all data.
|
||||
for (size_t i = 0; i < num_frames; ++i) {
|
||||
int out_len;
|
||||
size_t out_len;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
|
||||
@ -536,7 +537,7 @@ TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) {
|
||||
rtp_info,
|
||||
reinterpret_cast<uint8_t*>(payload),
|
||||
kPayloadBytes, 0));
|
||||
int out_len;
|
||||
size_t out_len;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
|
||||
@ -566,7 +567,7 @@ TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimeNegative) {
|
||||
}
|
||||
|
||||
// Pull out data once.
|
||||
int out_len;
|
||||
size_t out_len;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
|
||||
@ -597,7 +598,7 @@ TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) {
|
||||
}
|
||||
|
||||
// Pull out data once.
|
||||
int out_len;
|
||||
size_t out_len;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
|
||||
@ -622,7 +623,7 @@ void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor,
|
||||
const size_t kPayloadBytes = kSamples * 2;
|
||||
double next_input_time_ms = 0.0;
|
||||
double t_ms;
|
||||
int out_len;
|
||||
size_t out_len;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
|
||||
@ -854,7 +855,7 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(DecoderError)) {
|
||||
out_data_[i] = 1;
|
||||
}
|
||||
int num_channels;
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
EXPECT_EQ(NetEq::kFail,
|
||||
neteq_->GetAudio(kMaxBlockSize, out_data_,
|
||||
&samples_per_channel, &num_channels, &type));
|
||||
@ -887,7 +888,7 @@ TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
|
||||
out_data_[i] = 1;
|
||||
}
|
||||
int num_channels;
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
EXPECT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_,
|
||||
&samples_per_channel,
|
||||
&num_channels, &type));
|
||||
@ -908,7 +909,7 @@ class NetEqBgnTest : public NetEqDecodingTest {
|
||||
bool should_be_faded) = 0;
|
||||
|
||||
void CheckBgn(int sampling_rate_hz) {
|
||||
int16_t expected_samples_per_channel = 0;
|
||||
size_t expected_samples_per_channel = 0;
|
||||
uint8_t payload_type = 0xFF; // Invalid.
|
||||
if (sampling_rate_hz == 8000) {
|
||||
expected_samples_per_channel = kBlockSize8kHz;
|
||||
@ -932,7 +933,7 @@ class NetEqBgnTest : public NetEqDecodingTest {
|
||||
ASSERT_TRUE(input.Init(
|
||||
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
|
||||
10 * sampling_rate_hz, // Max 10 seconds loop length.
|
||||
static_cast<size_t>(expected_samples_per_channel)));
|
||||
expected_samples_per_channel));
|
||||
|
||||
// Payload of 10 ms of PCM16 32 kHz.
|
||||
uint8_t payload[kBlockSize32kHz * sizeof(int16_t)];
|
||||
@ -941,19 +942,18 @@ class NetEqBgnTest : public NetEqDecodingTest {
|
||||
rtp_info.header.payloadType = payload_type;
|
||||
|
||||
int number_channels = 0;
|
||||
int samples_per_channel = 0;
|
||||
size_t samples_per_channel = 0;
|
||||
|
||||
uint32_t receive_timestamp = 0;
|
||||
for (int n = 0; n < 10; ++n) { // Insert few packets and get audio.
|
||||
int16_t enc_len_bytes = WebRtcPcm16b_Encode(
|
||||
size_t enc_len_bytes = WebRtcPcm16b_Encode(
|
||||
input.GetNextBlock(), expected_samples_per_channel, payload);
|
||||
ASSERT_EQ(enc_len_bytes, expected_samples_per_channel * 2);
|
||||
|
||||
number_channels = 0;
|
||||
samples_per_channel = 0;
|
||||
ASSERT_EQ(0,
|
||||
neteq_->InsertPacket(rtp_info, payload,
|
||||
static_cast<size_t>(enc_len_bytes),
|
||||
neteq_->InsertPacket(rtp_info, payload, enc_len_bytes,
|
||||
receive_timestamp));
|
||||
ASSERT_EQ(0,
|
||||
neteq_->GetAudio(kBlockSize32kHz,
|
||||
@ -1009,7 +1009,7 @@ class NetEqBgnTest : public NetEqDecodingTest {
|
||||
if (type == kOutputPLCtoCNG) {
|
||||
plc_to_cng = true;
|
||||
double sum_squared = 0;
|
||||
for (int k = 0; k < number_channels * samples_per_channel; ++k)
|
||||
for (size_t k = 0; k < number_channels * samples_per_channel; ++k)
|
||||
sum_squared += output[k] * output[k];
|
||||
TestCondition(sum_squared, n > kFadingThreshold);
|
||||
} else {
|
||||
@ -1168,7 +1168,7 @@ TEST_F(NetEqDecodingTest, SyncPacketDecode) {
|
||||
// actual decoded values.
|
||||
NetEqOutputType output_type;
|
||||
int num_channels;
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
uint32_t receive_timestamp = 0;
|
||||
for (int n = 0; n < 100; ++n) {
|
||||
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
|
||||
@ -1246,7 +1246,7 @@ TEST_F(NetEqDecodingTest, SyncPacketBufferSizeAndOverridenByNetworkPackets) {
|
||||
// actual decoded values.
|
||||
NetEqOutputType output_type;
|
||||
int num_channels;
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
uint32_t receive_timestamp = 0;
|
||||
int algorithmic_frame_delay = algorithmic_delay_ms_ / 10 + 1;
|
||||
for (int n = 0; n < algorithmic_frame_delay; ++n) {
|
||||
@ -1315,7 +1315,7 @@ void NetEqDecodingTest::WrapTest(uint16_t start_seq_no,
|
||||
double next_input_time_ms = 0.0;
|
||||
int16_t decoded[kBlockSize16kHz];
|
||||
int num_channels;
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
NetEqOutputType output_type;
|
||||
uint32_t receive_timestamp = 0;
|
||||
|
||||
@ -1418,7 +1418,7 @@ void NetEqDecodingTest::DuplicateCng() {
|
||||
algorithmic_delay_ms_ * kSampleRateKhz, 5 * kSampleRateKhz / 8);
|
||||
// Insert three speech packets. Three are needed to get the frame length
|
||||
// correct.
|
||||
int out_len;
|
||||
size_t out_len;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
uint8_t payload[kPayloadBytes] = {0};
|
||||
@ -1515,7 +1515,7 @@ TEST_F(NetEqDecodingTest, CngFirst) {
|
||||
timestamp += kCngPeriodSamples;
|
||||
|
||||
// Pull audio once and make sure CNG is played.
|
||||
int out_len;
|
||||
size_t out_len;
|
||||
int num_channels;
|
||||
NetEqOutputType type;
|
||||
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
|
||||
|
||||
@ -45,12 +45,12 @@ int Normal::Process(const int16_t* input,
|
||||
output->PushBackInterleaved(input, length);
|
||||
int16_t* signal = &(*output)[0][0];
|
||||
|
||||
const unsigned fs_mult = fs_hz_ / 8000;
|
||||
const int fs_mult = fs_hz_ / 8000;
|
||||
assert(fs_mult > 0);
|
||||
// fs_shift = log2(fs_mult), rounded down.
|
||||
// Note that |fs_shift| is not "exact" for 48 kHz.
|
||||
// TODO(hlundin): Investigate this further.
|
||||
const int fs_shift = 30 - WebRtcSpl_NormW32(static_cast<int32_t>(fs_mult));
|
||||
const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult);
|
||||
|
||||
// Check if last RecOut call resulted in an Expand. If so, we have to take
|
||||
// care of some cross-fading and unmuting.
|
||||
@ -73,11 +73,11 @@ int Normal::Process(const int16_t* input,
|
||||
int16_t* signal = &(*output)[channel_ix][0];
|
||||
size_t length_per_channel = length / output->Channels();
|
||||
// Find largest absolute value in new data.
|
||||
int16_t decoded_max = WebRtcSpl_MaxAbsValueW16(
|
||||
signal, static_cast<int>(length_per_channel));
|
||||
int16_t decoded_max =
|
||||
WebRtcSpl_MaxAbsValueW16(signal, length_per_channel);
|
||||
// Adjust muting factor if needed (to BGN level).
|
||||
int energy_length = std::min(static_cast<int>(fs_mult * 64),
|
||||
static_cast<int>(length_per_channel));
|
||||
size_t energy_length =
|
||||
std::min(static_cast<size_t>(fs_mult * 64), length_per_channel);
|
||||
int scaling = 6 + fs_shift
|
||||
- WebRtcSpl_NormW32(decoded_max * decoded_max);
|
||||
scaling = std::max(scaling, 0); // |scaling| should always be >= 0.
|
||||
@ -111,7 +111,7 @@ int Normal::Process(const int16_t* input,
|
||||
}
|
||||
|
||||
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
|
||||
int increment = static_cast<int>(64 / fs_mult);
|
||||
int increment = 64 / fs_mult;
|
||||
for (size_t i = 0; i < length_per_channel; i++) {
|
||||
// Scale with mute factor.
|
||||
assert(channel_ix < output->Channels());
|
||||
@ -131,7 +131,7 @@ int Normal::Process(const int16_t* input,
|
||||
assert(fs_shift < 3); // Will always be 0, 1, or, 2.
|
||||
increment = 4 >> fs_shift;
|
||||
int fraction = increment;
|
||||
for (size_t i = 0; i < 8 * fs_mult; i++) {
|
||||
for (size_t i = 0; i < static_cast<size_t>(8 * fs_mult); i++) {
|
||||
// TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8
|
||||
// now for legacy bit-exactness.
|
||||
assert(channel_ix < output->Channels());
|
||||
@ -144,7 +144,7 @@ int Normal::Process(const int16_t* input,
|
||||
}
|
||||
} else if (last_mode == kModeRfc3389Cng) {
|
||||
assert(output->Channels() == 1); // Not adapted for multi-channel yet.
|
||||
static const int kCngLength = 32;
|
||||
static const size_t kCngLength = 32;
|
||||
int16_t cng_output[kCngLength];
|
||||
// Reset mute factor and start up fresh.
|
||||
external_mute_factor_array[0] = 16384;
|
||||
@ -167,7 +167,7 @@ int Normal::Process(const int16_t* input,
|
||||
assert(fs_shift < 3); // Will always be 0, 1, or, 2.
|
||||
int16_t increment = 4 >> fs_shift;
|
||||
int16_t fraction = increment;
|
||||
for (size_t i = 0; i < 8 * fs_mult; i++) {
|
||||
for (size_t i = 0; i < static_cast<size_t>(8 * fs_mult); i++) {
|
||||
// TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8 now
|
||||
// for legacy bit-exactness.
|
||||
signal[i] =
|
||||
@ -178,7 +178,7 @@ int Normal::Process(const int16_t* input,
|
||||
// Previous was neither of Expand, FadeToBGN or RFC3389_CNG, but we are
|
||||
// still ramping up from previous muting.
|
||||
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
|
||||
int increment = static_cast<int>(64 / fs_mult);
|
||||
int increment = 64 / fs_mult;
|
||||
size_t length_per_channel = length / output->Channels();
|
||||
for (size_t i = 0; i < length_per_channel; i++) {
|
||||
for (size_t channel_ix = 0; channel_ix < output->Channels();
|
||||
|
||||
@ -181,7 +181,7 @@ const RTPHeader* PacketBuffer::NextRtpHeader() const {
|
||||
return const_cast<const RTPHeader*>(&(buffer_.front()->header));
|
||||
}
|
||||
|
||||
Packet* PacketBuffer::GetNextPacket(int* discard_count) {
|
||||
Packet* PacketBuffer::GetNextPacket(size_t* discard_count) {
|
||||
if (Empty()) {
|
||||
// Buffer is empty.
|
||||
return NULL;
|
||||
@ -194,7 +194,7 @@ Packet* PacketBuffer::GetNextPacket(int* discard_count) {
|
||||
|
||||
// Discard other packets with the same timestamp. These are duplicates or
|
||||
// redundant payloads that should not be used.
|
||||
int discards = 0;
|
||||
size_t discards = 0;
|
||||
|
||||
while (!Empty() &&
|
||||
buffer_.front()->header.timestamp == packet->header.timestamp) {
|
||||
@ -240,15 +240,15 @@ int PacketBuffer::DiscardAllOldPackets(uint32_t timestamp_limit) {
|
||||
return DiscardOldPackets(timestamp_limit, 0);
|
||||
}
|
||||
|
||||
int PacketBuffer::NumPacketsInBuffer() const {
|
||||
return static_cast<int>(buffer_.size());
|
||||
size_t PacketBuffer::NumPacketsInBuffer() const {
|
||||
return buffer_.size();
|
||||
}
|
||||
|
||||
int PacketBuffer::NumSamplesInBuffer(DecoderDatabase* decoder_database,
|
||||
int last_decoded_length) const {
|
||||
size_t PacketBuffer::NumSamplesInBuffer(DecoderDatabase* decoder_database,
|
||||
size_t last_decoded_length) const {
|
||||
PacketList::const_iterator it;
|
||||
int num_samples = 0;
|
||||
int last_duration = last_decoded_length;
|
||||
size_t num_samples = 0;
|
||||
size_t last_duration = last_decoded_length;
|
||||
for (it = buffer_.begin(); it != buffer_.end(); ++it) {
|
||||
Packet* packet = (*it);
|
||||
AudioDecoder* decoder =
|
||||
|
||||
@ -88,7 +88,7 @@ class PacketBuffer {
|
||||
// Subsequent packets with the same timestamp as the one extracted will be
|
||||
// discarded and properly deleted. The number of discarded packets will be
|
||||
// written to the output variable |discard_count|.
|
||||
virtual Packet* GetNextPacket(int* discard_count);
|
||||
virtual Packet* GetNextPacket(size_t* discard_count);
|
||||
|
||||
// Discards the first packet in the buffer. The packet is deleted.
|
||||
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
|
||||
@ -109,12 +109,12 @@ class PacketBuffer {
|
||||
|
||||
// Returns the number of packets in the buffer, including duplicates and
|
||||
// redundant packets.
|
||||
virtual int NumPacketsInBuffer() const;
|
||||
virtual size_t NumPacketsInBuffer() const;
|
||||
|
||||
// Returns the number of samples in the buffer, including samples carried in
|
||||
// duplicate and redundant packets.
|
||||
virtual int NumSamplesInBuffer(DecoderDatabase* decoder_database,
|
||||
int last_decoded_length) const;
|
||||
virtual size_t NumSamplesInBuffer(DecoderDatabase* decoder_database,
|
||||
size_t last_decoded_length) const;
|
||||
|
||||
// Increase the waiting time counter for every packet in the buffer by |inc|.
|
||||
// The default value for |inc| is 1.
|
||||
|
||||
@ -97,7 +97,7 @@ TEST(PacketBuffer, InsertPacket) {
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
|
||||
EXPECT_EQ(4711u, next_ts);
|
||||
EXPECT_FALSE(buffer.Empty());
|
||||
EXPECT_EQ(1, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
|
||||
const RTPHeader* hdr = buffer.NextRtpHeader();
|
||||
EXPECT_EQ(&(packet->header), hdr); // Compare pointer addresses.
|
||||
|
||||
@ -116,12 +116,12 @@ TEST(PacketBuffer, FlushBuffer) {
|
||||
Packet* packet = gen.NextPacket(payload_len);
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet));
|
||||
}
|
||||
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
EXPECT_FALSE(buffer.Empty());
|
||||
|
||||
buffer.Flush();
|
||||
// Buffer should delete the payloads itself.
|
||||
EXPECT_EQ(0, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(0u, buffer.NumPacketsInBuffer());
|
||||
EXPECT_TRUE(buffer.Empty());
|
||||
}
|
||||
|
||||
@ -137,7 +137,7 @@ TEST(PacketBuffer, OverfillBuffer) {
|
||||
Packet* packet = gen.NextPacket(payload_len);
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet));
|
||||
}
|
||||
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
uint32_t next_ts;
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
|
||||
EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line.
|
||||
@ -145,7 +145,7 @@ TEST(PacketBuffer, OverfillBuffer) {
|
||||
// Insert 11th packet; should flush the buffer and insert it after flushing.
|
||||
Packet* packet = gen.NextPacket(payload_len);
|
||||
EXPECT_EQ(PacketBuffer::kFlushed, buffer.InsertPacket(packet));
|
||||
EXPECT_EQ(1, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
|
||||
// Expect last inserted packet to be first in line.
|
||||
EXPECT_EQ(packet->header.timestamp, next_ts);
|
||||
@ -179,7 +179,7 @@ TEST(PacketBuffer, InsertPacketList) {
|
||||
¤t_pt,
|
||||
¤t_cng_pt));
|
||||
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
|
||||
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(0, current_pt); // Current payload type changed to 0.
|
||||
EXPECT_EQ(0xFF, current_cng_pt); // CNG payload type not changed.
|
||||
|
||||
@ -220,7 +220,7 @@ TEST(PacketBuffer, InsertPacketListChangePayloadType) {
|
||||
¤t_pt,
|
||||
¤t_cng_pt));
|
||||
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
|
||||
EXPECT_EQ(1, buffer.NumPacketsInBuffer()); // Only the last packet.
|
||||
EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); // Only the last packet.
|
||||
EXPECT_EQ(1, current_pt); // Current payload type changed to 0.
|
||||
EXPECT_EQ(0xFF, current_cng_pt); // CNG payload type not changed.
|
||||
|
||||
@ -256,7 +256,7 @@ TEST(PacketBuffer, ExtractOrderRedundancy) {
|
||||
{0x0006, 0x0000001E, 1, false, -1},
|
||||
};
|
||||
|
||||
const int kExpectPacketsInBuffer = 9;
|
||||
const size_t kExpectPacketsInBuffer = 9;
|
||||
|
||||
std::vector<Packet*> expect_order(kExpectPacketsInBuffer);
|
||||
|
||||
@ -277,10 +277,10 @@ TEST(PacketBuffer, ExtractOrderRedundancy) {
|
||||
|
||||
EXPECT_EQ(kExpectPacketsInBuffer, buffer.NumPacketsInBuffer());
|
||||
|
||||
int drop_count;
|
||||
for (int i = 0; i < kExpectPacketsInBuffer; ++i) {
|
||||
size_t drop_count;
|
||||
for (size_t i = 0; i < kExpectPacketsInBuffer; ++i) {
|
||||
Packet* packet = buffer.GetNextPacket(&drop_count);
|
||||
EXPECT_EQ(0, drop_count);
|
||||
EXPECT_EQ(0u, drop_count);
|
||||
EXPECT_EQ(packet, expect_order[i]); // Compare pointer addresses.
|
||||
delete[] packet->payload;
|
||||
delete packet;
|
||||
@ -302,7 +302,7 @@ TEST(PacketBuffer, DiscardPackets) {
|
||||
Packet* packet = gen.NextPacket(payload_len);
|
||||
buffer.InsertPacket(packet);
|
||||
}
|
||||
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
|
||||
// Discard them one by one and make sure that the right packets are at the
|
||||
// front of the buffer.
|
||||
@ -350,7 +350,7 @@ TEST(PacketBuffer, Reordering) {
|
||||
decoder_database,
|
||||
¤t_pt,
|
||||
¤t_cng_pt));
|
||||
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
|
||||
// Extract them and make sure that come out in the right order.
|
||||
uint32_t current_ts = start_ts;
|
||||
@ -425,7 +425,7 @@ TEST(PacketBuffer, Failures) {
|
||||
¤t_pt,
|
||||
¤t_cng_pt));
|
||||
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
|
||||
EXPECT_EQ(1, buffer->NumPacketsInBuffer());
|
||||
EXPECT_EQ(1u, buffer->NumPacketsInBuffer());
|
||||
delete buffer;
|
||||
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
|
||||
}
|
||||
|
||||
@ -45,7 +45,7 @@ void PostDecodeVad::Init() {
|
||||
}
|
||||
}
|
||||
|
||||
void PostDecodeVad::Update(int16_t* signal, int length,
|
||||
void PostDecodeVad::Update(int16_t* signal, size_t length,
|
||||
AudioDecoder::SpeechType speech_type,
|
||||
bool sid_frame,
|
||||
int fs_hz) {
|
||||
@ -68,12 +68,13 @@ void PostDecodeVad::Update(int16_t* signal, int length,
|
||||
}
|
||||
|
||||
if (length > 0 && running_) {
|
||||
int vad_sample_index = 0;
|
||||
size_t vad_sample_index = 0;
|
||||
active_speech_ = false;
|
||||
// Loop through frame sizes 30, 20, and 10 ms.
|
||||
for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10;
|
||||
vad_frame_size_ms -= 10) {
|
||||
int vad_frame_size_samples = vad_frame_size_ms * fs_hz / 1000;
|
||||
size_t vad_frame_size_samples =
|
||||
static_cast<size_t>(vad_frame_size_ms * fs_hz / 1000);
|
||||
while (length - vad_sample_index >= vad_frame_size_samples) {
|
||||
int vad_return = WebRtcVad_Process(
|
||||
vad_instance_, fs_hz, &signal[vad_sample_index],
|
||||
|
||||
@ -46,7 +46,7 @@ class PostDecodeVad {
|
||||
|
||||
// Updates post-decode VAD with the audio data in |signal| having |length|
|
||||
// samples. The data is of type |speech_type|, at the sample rate |fs_hz|.
|
||||
void Update(int16_t* signal, int length,
|
||||
void Update(int16_t* signal, size_t length,
|
||||
AudioDecoder::SpeechType speech_type, bool sid_frame, int fs_hz);
|
||||
|
||||
// Accessors.
|
||||
|
||||
@ -18,14 +18,14 @@ namespace webrtc {
|
||||
|
||||
PreemptiveExpand::ReturnCodes PreemptiveExpand::Process(
|
||||
const int16_t* input,
|
||||
int input_length,
|
||||
int old_data_length,
|
||||
size_t input_length,
|
||||
size_t old_data_length,
|
||||
AudioMultiVector* output,
|
||||
int16_t* length_change_samples) {
|
||||
size_t* length_change_samples) {
|
||||
old_data_length_per_channel_ = old_data_length;
|
||||
// Input length must be (almost) 30 ms.
|
||||
// Also, the new part must be at least |overlap_samples_| elements.
|
||||
static const int k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
|
||||
static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
|
||||
if (num_channels_ == 0 ||
|
||||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ ||
|
||||
old_data_length >= input_length / num_channels_ - overlap_samples_) {
|
||||
@ -41,7 +41,7 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::Process(
|
||||
|
||||
void PreemptiveExpand::SetParametersForPassiveSpeech(size_t len,
|
||||
int16_t* best_correlation,
|
||||
int* peak_index) const {
|
||||
size_t* peak_index) const {
|
||||
// When the signal does not contain any active speech, the correlation does
|
||||
// not matter. Simply set it to zero.
|
||||
*best_correlation = 0;
|
||||
@ -51,7 +51,7 @@ void PreemptiveExpand::SetParametersForPassiveSpeech(size_t len,
|
||||
// the new data.
|
||||
// but we must ensure that best_correlation is not larger than the new data.
|
||||
*peak_index = std::min(*peak_index,
|
||||
static_cast<int>(len - old_data_length_per_channel_));
|
||||
len - old_data_length_per_channel_);
|
||||
}
|
||||
|
||||
PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
|
||||
@ -64,8 +64,7 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
|
||||
AudioMultiVector* output) const {
|
||||
// Pre-calculate common multiplication with |fs_mult_|.
|
||||
// 120 corresponds to 15 ms.
|
||||
int fs_mult_120 = fs_mult_ * 120;
|
||||
assert(old_data_length_per_channel_ >= 0); // Make sure it's been set.
|
||||
size_t fs_mult_120 = static_cast<size_t>(fs_mult_ * 120);
|
||||
// Check for strong correlation (>0.9 in Q14) and at least 15 ms new data,
|
||||
// or passive speech.
|
||||
if (((best_correlation > kCorrelationThreshold) &&
|
||||
@ -107,7 +106,7 @@ PreemptiveExpand* PreemptiveExpandFactory::Create(
|
||||
int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
const BackgroundNoise& background_noise,
|
||||
int overlap_samples) const {
|
||||
size_t overlap_samples) const {
|
||||
return new PreemptiveExpand(
|
||||
sample_rate_hz, num_channels, background_noise, overlap_samples);
|
||||
}
|
||||
|
||||
@ -32,9 +32,9 @@ class PreemptiveExpand : public TimeStretch {
|
||||
PreemptiveExpand(int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
const BackgroundNoise& background_noise,
|
||||
int overlap_samples)
|
||||
size_t overlap_samples)
|
||||
: TimeStretch(sample_rate_hz, num_channels, background_noise),
|
||||
old_data_length_per_channel_(-1),
|
||||
old_data_length_per_channel_(0),
|
||||
overlap_samples_(overlap_samples) {
|
||||
}
|
||||
|
||||
@ -44,17 +44,17 @@ class PreemptiveExpand : public TimeStretch {
|
||||
// is provided in the output |length_change_samples|. The method returns
|
||||
// the outcome of the operation as an enumerator value.
|
||||
ReturnCodes Process(const int16_t *pw16_decoded,
|
||||
int len,
|
||||
int old_data_len,
|
||||
size_t len,
|
||||
size_t old_data_len,
|
||||
AudioMultiVector* output,
|
||||
int16_t* length_change_samples);
|
||||
size_t* length_change_samples);
|
||||
|
||||
protected:
|
||||
// Sets the parameters |best_correlation| and |peak_index| to suitable
|
||||
// values when the signal contains no active speech.
|
||||
void SetParametersForPassiveSpeech(size_t input_length,
|
||||
int16_t* best_correlation,
|
||||
int* peak_index) const override;
|
||||
size_t* peak_index) const override;
|
||||
|
||||
// Checks the criteria for performing the time-stretching operation and,
|
||||
// if possible, performs the time-stretching.
|
||||
@ -67,8 +67,8 @@ class PreemptiveExpand : public TimeStretch {
|
||||
AudioMultiVector* output) const override;
|
||||
|
||||
private:
|
||||
int old_data_length_per_channel_;
|
||||
int overlap_samples_;
|
||||
size_t old_data_length_per_channel_;
|
||||
size_t overlap_samples_;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(PreemptiveExpand);
|
||||
};
|
||||
@ -81,7 +81,7 @@ struct PreemptiveExpandFactory {
|
||||
int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
const BackgroundNoise& background_noise,
|
||||
int overlap_samples) const;
|
||||
size_t overlap_samples) const;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -21,7 +21,7 @@ namespace webrtc {
|
||||
// This class generates pseudo-random samples.
|
||||
class RandomVector {
|
||||
public:
|
||||
static const int kRandomTableSize = 256;
|
||||
static const size_t kRandomTableSize = 256;
|
||||
static const int16_t kRandomTable[kRandomTableSize];
|
||||
|
||||
RandomVector()
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#include <string.h> // memset
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/base/safe_conversions.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
|
||||
#include "webrtc/system_wrappers/interface/metrics.h"
|
||||
@ -140,36 +141,37 @@ void StatisticsCalculator::ResetWaitingTimeStatistics() {
|
||||
next_waiting_time_index_ = 0;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::ExpandedVoiceSamples(int num_samples) {
|
||||
void StatisticsCalculator::ExpandedVoiceSamples(size_t num_samples) {
|
||||
expanded_speech_samples_ += num_samples;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::ExpandedNoiseSamples(int num_samples) {
|
||||
void StatisticsCalculator::ExpandedNoiseSamples(size_t num_samples) {
|
||||
expanded_noise_samples_ += num_samples;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::PreemptiveExpandedSamples(int num_samples) {
|
||||
void StatisticsCalculator::PreemptiveExpandedSamples(size_t num_samples) {
|
||||
preemptive_samples_ += num_samples;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::AcceleratedSamples(int num_samples) {
|
||||
void StatisticsCalculator::AcceleratedSamples(size_t num_samples) {
|
||||
accelerate_samples_ += num_samples;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::AddZeros(int num_samples) {
|
||||
void StatisticsCalculator::AddZeros(size_t num_samples) {
|
||||
added_zero_samples_ += num_samples;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::PacketsDiscarded(int num_packets) {
|
||||
void StatisticsCalculator::PacketsDiscarded(size_t num_packets) {
|
||||
discarded_packets_ += num_packets;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::LostSamples(int num_samples) {
|
||||
void StatisticsCalculator::LostSamples(size_t num_samples) {
|
||||
lost_timestamps_ += num_samples;
|
||||
}
|
||||
|
||||
void StatisticsCalculator::IncreaseCounter(int num_samples, int fs_hz) {
|
||||
const int time_step_ms = rtc::CheckedDivExact(1000 * num_samples, fs_hz);
|
||||
void StatisticsCalculator::IncreaseCounter(size_t num_samples, int fs_hz) {
|
||||
const int time_step_ms =
|
||||
rtc::CheckedDivExact(static_cast<int>(1000 * num_samples), fs_hz);
|
||||
delayed_packet_outage_counter_.AdvanceClock(time_step_ms);
|
||||
excess_buffer_delay_.AdvanceClock(time_step_ms);
|
||||
timestamps_since_last_report_ += static_cast<uint32_t>(num_samples);
|
||||
@ -207,8 +209,8 @@ void StatisticsCalculator::StoreWaitingTime(int waiting_time_ms) {
|
||||
|
||||
void StatisticsCalculator::GetNetworkStatistics(
|
||||
int fs_hz,
|
||||
int num_samples_in_buffers,
|
||||
int samples_per_packet,
|
||||
size_t num_samples_in_buffers,
|
||||
size_t samples_per_packet,
|
||||
const DelayManager& delay_manager,
|
||||
const DecisionLogic& decision_logic,
|
||||
NetEqNetworkStatistics *stats) {
|
||||
@ -220,8 +222,8 @@ void StatisticsCalculator::GetNetworkStatistics(
|
||||
stats->added_zero_samples = added_zero_samples_;
|
||||
stats->current_buffer_size_ms =
|
||||
static_cast<uint16_t>(num_samples_in_buffers * 1000 / fs_hz);
|
||||
const int ms_per_packet = decision_logic.packet_length_samples() /
|
||||
(fs_hz / 1000);
|
||||
const int ms_per_packet = rtc::checked_cast<int>(
|
||||
decision_logic.packet_length_samples() / (fs_hz / 1000));
|
||||
stats->preferred_buffer_size_ms = (delay_manager.TargetLevel() >> 8) *
|
||||
ms_per_packet;
|
||||
stats->jitter_peaks_found = delay_manager.PeakFound();
|
||||
@ -230,7 +232,7 @@ void StatisticsCalculator::GetNetworkStatistics(
|
||||
stats->packet_loss_rate =
|
||||
CalculateQ14Ratio(lost_timestamps_, timestamps_since_last_report_);
|
||||
|
||||
const unsigned discarded_samples = discarded_packets_ * samples_per_packet;
|
||||
const size_t discarded_samples = discarded_packets_ * samples_per_packet;
|
||||
stats->packet_discard_rate =
|
||||
CalculateQ14Ratio(discarded_samples, timestamps_since_last_report_);
|
||||
|
||||
@ -265,7 +267,7 @@ void StatisticsCalculator::WaitingTimes(std::vector<int>* waiting_times) {
|
||||
ResetWaitingTimeStatistics();
|
||||
}
|
||||
|
||||
uint16_t StatisticsCalculator::CalculateQ14Ratio(uint32_t numerator,
|
||||
uint16_t StatisticsCalculator::CalculateQ14Ratio(size_t numerator,
|
||||
uint32_t denominator) {
|
||||
if (numerator == 0) {
|
||||
return 0;
|
||||
|
||||
@ -42,32 +42,32 @@ class StatisticsCalculator {
|
||||
|
||||
// Reports that |num_samples| samples were produced through expansion, and
|
||||
// that the expansion produced other than just noise samples.
|
||||
void ExpandedVoiceSamples(int num_samples);
|
||||
void ExpandedVoiceSamples(size_t num_samples);
|
||||
|
||||
// Reports that |num_samples| samples were produced through expansion, and
|
||||
// that the expansion produced only noise samples.
|
||||
void ExpandedNoiseSamples(int num_samples);
|
||||
void ExpandedNoiseSamples(size_t num_samples);
|
||||
|
||||
// Reports that |num_samples| samples were produced through preemptive
|
||||
// expansion.
|
||||
void PreemptiveExpandedSamples(int num_samples);
|
||||
void PreemptiveExpandedSamples(size_t num_samples);
|
||||
|
||||
// Reports that |num_samples| samples were removed through accelerate.
|
||||
void AcceleratedSamples(int num_samples);
|
||||
void AcceleratedSamples(size_t num_samples);
|
||||
|
||||
// Reports that |num_samples| zeros were inserted into the output.
|
||||
void AddZeros(int num_samples);
|
||||
void AddZeros(size_t num_samples);
|
||||
|
||||
// Reports that |num_packets| packets were discarded.
|
||||
void PacketsDiscarded(int num_packets);
|
||||
void PacketsDiscarded(size_t num_packets);
|
||||
|
||||
// Reports that |num_samples| were lost.
|
||||
void LostSamples(int num_samples);
|
||||
void LostSamples(size_t num_samples);
|
||||
|
||||
// Increases the report interval counter with |num_samples| at a sample rate
|
||||
// of |fs_hz|. This is how the StatisticsCalculator gets notified that current
|
||||
// time is increasing.
|
||||
void IncreaseCounter(int num_samples, int fs_hz);
|
||||
void IncreaseCounter(size_t num_samples, int fs_hz);
|
||||
|
||||
// Stores new packet waiting time in waiting time statistics.
|
||||
void StoreWaitingTime(int waiting_time_ms);
|
||||
@ -85,8 +85,8 @@ class StatisticsCalculator {
|
||||
// yet to play out is |num_samples_in_buffers|, and the number of samples per
|
||||
// packet is |samples_per_packet|.
|
||||
void GetNetworkStatistics(int fs_hz,
|
||||
int num_samples_in_buffers,
|
||||
int samples_per_packet,
|
||||
size_t num_samples_in_buffers,
|
||||
size_t samples_per_packet,
|
||||
const DelayManager& delay_manager,
|
||||
const DecisionLogic& decision_logic,
|
||||
NetEqNetworkStatistics *stats);
|
||||
@ -150,15 +150,15 @@ class StatisticsCalculator {
|
||||
};
|
||||
|
||||
// Calculates numerator / denominator, and returns the value in Q14.
|
||||
static uint16_t CalculateQ14Ratio(uint32_t numerator, uint32_t denominator);
|
||||
static uint16_t CalculateQ14Ratio(size_t numerator, uint32_t denominator);
|
||||
|
||||
uint32_t preemptive_samples_;
|
||||
uint32_t accelerate_samples_;
|
||||
int added_zero_samples_;
|
||||
uint32_t expanded_speech_samples_;
|
||||
uint32_t expanded_noise_samples_;
|
||||
int discarded_packets_;
|
||||
uint32_t lost_timestamps_;
|
||||
size_t preemptive_samples_;
|
||||
size_t accelerate_samples_;
|
||||
size_t added_zero_samples_;
|
||||
size_t expanded_speech_samples_;
|
||||
size_t expanded_noise_samples_;
|
||||
size_t discarded_packets_;
|
||||
size_t lost_timestamps_;
|
||||
uint32_t timestamps_since_last_report_;
|
||||
int waiting_times_[kLenWaitingTimes]; // Used as a circular buffer.
|
||||
int len_waiting_times_;
|
||||
|
||||
@ -23,6 +23,8 @@
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
// needed for NetEqDecoder
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
|
||||
@ -76,27 +78,27 @@
|
||||
void NetEQTest_GetCodec_and_PT(char* name,
|
||||
webrtc::NetEqDecoder* codec,
|
||||
int* PT,
|
||||
int frameLen,
|
||||
size_t frameLen,
|
||||
int* fs,
|
||||
int* bitrate,
|
||||
int* useRed);
|
||||
int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
int enc_frameSize,
|
||||
size_t enc_frameSize,
|
||||
int bitrate,
|
||||
int sampfreq,
|
||||
int vad,
|
||||
int numChannels);
|
||||
size_t numChannels);
|
||||
void defineCodecs(webrtc::NetEqDecoder* usedCodec, int* noOfCodecs);
|
||||
int NetEQTest_free_coders(webrtc::NetEqDecoder coder, int numChannels);
|
||||
int NetEQTest_encode(int coder,
|
||||
int16_t* indata,
|
||||
int frameLen,
|
||||
unsigned char* encoded,
|
||||
int sampleRate,
|
||||
int* vad,
|
||||
int useVAD,
|
||||
int bitrate,
|
||||
int numChannels);
|
||||
int NetEQTest_free_coders(webrtc::NetEqDecoder coder, size_t numChannels);
|
||||
size_t NetEQTest_encode(int coder,
|
||||
int16_t* indata,
|
||||
size_t frameLen,
|
||||
unsigned char* encoded,
|
||||
int sampleRate,
|
||||
int* vad,
|
||||
int useVAD,
|
||||
int bitrate,
|
||||
size_t numChannels);
|
||||
void makeRTPheader(unsigned char* rtp_data,
|
||||
int payloadType,
|
||||
int seqNo,
|
||||
@ -109,13 +111,13 @@ int makeRedundantHeader(unsigned char* rtp_data,
|
||||
uint16_t* blockLen,
|
||||
int seqNo,
|
||||
uint32_t ssrc);
|
||||
int makeDTMFpayload(unsigned char* payload_data,
|
||||
int Event,
|
||||
int End,
|
||||
int Volume,
|
||||
int Duration);
|
||||
void stereoDeInterleave(int16_t* audioSamples, int numSamples);
|
||||
void stereoInterleave(unsigned char* data, int dataLen, int stride);
|
||||
size_t makeDTMFpayload(unsigned char* payload_data,
|
||||
int Event,
|
||||
int End,
|
||||
int Volume,
|
||||
int Duration);
|
||||
void stereoDeInterleave(int16_t* audioSamples, size_t numSamples);
|
||||
void stereoInterleave(unsigned char* data, size_t dataLen, size_t stride);
|
||||
|
||||
/*********************/
|
||||
/* Codec definitions */
|
||||
@ -264,13 +266,14 @@ SPEEX_encinst_t* SPEEX16enc_inst[2];
|
||||
#endif
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
int packet_size, fs;
|
||||
size_t packet_size;
|
||||
int fs;
|
||||
webrtc::NetEqDecoder usedCodec;
|
||||
int payloadType;
|
||||
int bitrate = 0;
|
||||
int useVAD, vad;
|
||||
int useRed = 0;
|
||||
int len, enc_len;
|
||||
size_t len, enc_len;
|
||||
int16_t org_data[4000];
|
||||
unsigned char rtp_data[8000];
|
||||
int16_t seqNo = 0xFFF;
|
||||
@ -282,14 +285,14 @@ int main(int argc, char* argv[]) {
|
||||
int red_PT[2] = {0};
|
||||
uint32_t red_TS[2] = {0};
|
||||
uint16_t red_len[2] = {0};
|
||||
int RTPheaderLen = 12;
|
||||
size_t RTPheaderLen = 12;
|
||||
uint8_t red_data[8000];
|
||||
#ifdef INSERT_OLD_PACKETS
|
||||
uint16_t old_length, old_plen;
|
||||
int old_enc_len;
|
||||
size_t old_enc_len;
|
||||
int first_old_packet = 1;
|
||||
unsigned char old_rtp_data[8000];
|
||||
int packet_age = 0;
|
||||
size_t packet_age = 0;
|
||||
#endif
|
||||
#ifdef INSERT_DTMF_PACKETS
|
||||
int NTone = 1;
|
||||
@ -298,8 +301,8 @@ int main(int argc, char* argv[]) {
|
||||
bool dtmfSent = false;
|
||||
#endif
|
||||
bool usingStereo = false;
|
||||
int stereoMode = 0;
|
||||
int numChannels = 1;
|
||||
size_t stereoMode = 0;
|
||||
size_t numChannels = 1;
|
||||
|
||||
/* check number of parameters */
|
||||
if ((argc != 6) && (argc != 7)) {
|
||||
@ -449,12 +452,13 @@ int main(int argc, char* argv[]) {
|
||||
FILE* out_file = fopen(argv[2], "wb");
|
||||
CHECK_NOT_NULL(out_file);
|
||||
printf("Output file: %s\n\n", argv[2]);
|
||||
packet_size = atoi(argv[3]);
|
||||
if (packet_size <= 0) {
|
||||
printf("Packet size %d must be positive", packet_size);
|
||||
int packet_size_int = atoi(argv[3]);
|
||||
if (packet_size_int <= 0) {
|
||||
printf("Packet size %d must be positive", packet_size_int);
|
||||
return -1;
|
||||
}
|
||||
printf("Packet size: %d\n", packet_size);
|
||||
printf("Packet size: %d\n", packet_size_int);
|
||||
packet_size = static_cast<size_t>(packet_size_int);
|
||||
|
||||
// check for stereo
|
||||
if (argv[4][strlen(argv[4]) - 1] == '*') {
|
||||
@ -653,10 +657,6 @@ int main(int argc, char* argv[]) {
|
||||
enc_len =
|
||||
NetEQTest_encode(usedCodec, org_data, packet_size, &rtp_data[12], fs,
|
||||
&vad, useVAD, bitrate, numChannels);
|
||||
if (enc_len == -1) {
|
||||
printf("Error encoding frame\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (usingStereo && stereoMode != STEREO_MODE_FRAME && vad == 1) {
|
||||
// interleave the encoded payload for sample-based codecs (not for CNG)
|
||||
@ -729,12 +729,12 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
#ifdef RANDOM_DATA
|
||||
for (int k = 0; k < 12 + enc_len; k++) {
|
||||
for (size_t k = 0; k < 12 + enc_len; k++) {
|
||||
rtp_data[k] = rand() + rand();
|
||||
}
|
||||
#endif
|
||||
#ifdef RANDOM_PAYLOAD_DATA
|
||||
for (int k = 12; k < 12 + enc_len; k++) {
|
||||
for (size_t k = 12; k < 12 + enc_len; k++) {
|
||||
rtp_data[k] = rand() + rand();
|
||||
}
|
||||
#endif
|
||||
@ -822,7 +822,7 @@ int main(int argc, char* argv[]) {
|
||||
void NetEQTest_GetCodec_and_PT(char* name,
|
||||
webrtc::NetEqDecoder* codec,
|
||||
int* PT,
|
||||
int frameLen,
|
||||
size_t frameLen,
|
||||
int* fs,
|
||||
int* bitrate,
|
||||
int* useRed) {
|
||||
@ -887,14 +887,14 @@ void NetEQTest_GetCodec_and_PT(char* name,
|
||||
}
|
||||
|
||||
int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
int enc_frameSize,
|
||||
size_t enc_frameSize,
|
||||
int bitrate,
|
||||
int sampfreq,
|
||||
int vad,
|
||||
int numChannels) {
|
||||
size_t numChannels) {
|
||||
int ok = 0;
|
||||
|
||||
for (int k = 0; k < numChannels; k++) {
|
||||
for (size_t k = 0; k < numChannels; k++) {
|
||||
VAD_inst[k] = WebRtcVad_Create();
|
||||
if (!VAD_inst[k]) {
|
||||
printf("Error: Couldn't allocate memory for VAD instance\n");
|
||||
@ -962,7 +962,7 @@ int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
WebRtcG729_EncoderInit(G729enc_inst[k], vad);
|
||||
if ((vad == 1) && (enc_frameSize != 80)) {
|
||||
printf("\nError - This simulation only supports VAD for G729 at "
|
||||
"10ms packets (not %dms)\n", (enc_frameSize >> 3));
|
||||
"10ms packets (not %" PRIuS "ms)\n", (enc_frameSize >> 3));
|
||||
}
|
||||
} else {
|
||||
printf("\nError - g729 is only developed for 8kHz \n");
|
||||
@ -1018,7 +1018,7 @@ int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
}
|
||||
if ((vad == 1) && (enc_frameSize != 160)) {
|
||||
printf("\nError - This simulation only supports VAD for Speex at "
|
||||
"20ms packets (not %dms)\n",
|
||||
"20ms packets (not %" PRIuS "ms)\n",
|
||||
(enc_frameSize >> 3));
|
||||
vad = 0;
|
||||
}
|
||||
@ -1049,7 +1049,7 @@ int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
}
|
||||
if ((vad == 1) && (enc_frameSize != 320)) {
|
||||
printf("\nError - This simulation only supports VAD for Speex at "
|
||||
"20ms packets (not %dms)\n",
|
||||
"20ms packets (not %" PRIuS "ms)\n",
|
||||
(enc_frameSize >> 4));
|
||||
vad = 0;
|
||||
}
|
||||
@ -1238,8 +1238,7 @@ int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
"instance\n");
|
||||
exit(0);
|
||||
}
|
||||
if (((enc_frameSize / 320) < 0) || ((enc_frameSize / 320) > 3) ||
|
||||
((enc_frameSize % 320) != 0)) {
|
||||
if (((enc_frameSize / 320) > 3) || ((enc_frameSize % 320) != 0)) {
|
||||
printf("\nError - AMRwb must have frameSize of 20, 40 or 60ms\n");
|
||||
exit(0);
|
||||
}
|
||||
@ -1320,7 +1319,8 @@ int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
bitrate);
|
||||
exit(0);
|
||||
}
|
||||
WebRtcIsac_Control(ISAC_inst[k], bitrate, enc_frameSize >> 4);
|
||||
WebRtcIsac_Control(ISAC_inst[k], bitrate,
|
||||
static_cast<int>(enc_frameSize >> 4));
|
||||
} else {
|
||||
printf("\nError - iSAC only supports 480 or 960 enc_frameSize (30 or "
|
||||
"60 ms)\n");
|
||||
@ -1379,7 +1379,8 @@ int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
"56000 bps (not %i)\n", bitrate);
|
||||
exit(0);
|
||||
}
|
||||
WebRtcIsac_Control(ISACSWB_inst[k], bitrate, enc_frameSize >> 5);
|
||||
WebRtcIsac_Control(ISACSWB_inst[k], bitrate,
|
||||
static_cast<int>(enc_frameSize >> 5));
|
||||
} else {
|
||||
printf("\nError - iSAC SWB only supports 960 enc_frameSize (30 "
|
||||
"ms)\n");
|
||||
@ -1424,8 +1425,8 @@ int NetEQTest_init_coders(webrtc::NetEqDecoder coder,
|
||||
return (0);
|
||||
}
|
||||
|
||||
int NetEQTest_free_coders(webrtc::NetEqDecoder coder, int numChannels) {
|
||||
for (int k = 0; k < numChannels; k++) {
|
||||
int NetEQTest_free_coders(webrtc::NetEqDecoder coder, size_t numChannels) {
|
||||
for (size_t k = 0; k < numChannels; k++) {
|
||||
WebRtcVad_Free(VAD_inst[k]);
|
||||
#if (defined(CODEC_CNGCODEC8) || defined(CODEC_CNGCODEC16) || \
|
||||
defined(CODEC_CNGCODEC32) || defined(CODEC_CNGCODEC48))
|
||||
@ -1552,35 +1553,34 @@ int NetEQTest_free_coders(webrtc::NetEqDecoder coder, int numChannels) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
int NetEQTest_encode(int coder,
|
||||
int16_t* indata,
|
||||
int frameLen,
|
||||
unsigned char* encoded,
|
||||
int sampleRate,
|
||||
int* vad,
|
||||
int useVAD,
|
||||
int bitrate,
|
||||
int numChannels) {
|
||||
int cdlen = 0;
|
||||
size_t NetEQTest_encode(int coder,
|
||||
int16_t* indata,
|
||||
size_t frameLen,
|
||||
unsigned char* encoded,
|
||||
int sampleRate,
|
||||
int* vad,
|
||||
int useVAD,
|
||||
int bitrate,
|
||||
size_t numChannels) {
|
||||
size_t cdlen = 0;
|
||||
int16_t* tempdata;
|
||||
static int first_cng = 1;
|
||||
int16_t tempLen;
|
||||
|
||||
size_t tempLen;
|
||||
*vad = 1;
|
||||
|
||||
// check VAD first
|
||||
if (useVAD) {
|
||||
*vad = 0;
|
||||
|
||||
int sampleRate_10 = 10 * sampleRate / 1000;
|
||||
int sampleRate_20 = 20 * sampleRate / 1000;
|
||||
int sampleRate_30 = 30 * sampleRate / 1000;
|
||||
for (int k = 0; k < numChannels; k++) {
|
||||
size_t sampleRate_10 = static_cast<size_t>(10 * sampleRate / 1000);
|
||||
size_t sampleRate_20 = static_cast<size_t>(20 * sampleRate / 1000);
|
||||
size_t sampleRate_30 = static_cast<size_t>(30 * sampleRate / 1000);
|
||||
for (size_t k = 0; k < numChannels; k++) {
|
||||
tempLen = frameLen;
|
||||
tempdata = &indata[k * frameLen];
|
||||
int localVad = 0;
|
||||
/* Partition the signal and test each chunk for VAD.
|
||||
All chunks must be VAD=0 to produce a total VAD=0. */
|
||||
All chunks must be VAD=0 to produce a total VAD=0. */
|
||||
while (tempLen >= sampleRate_10) {
|
||||
if ((tempLen % sampleRate_30) == 0) { // tempLen is multiple of 30ms
|
||||
localVad |= WebRtcVad_Process(VAD_inst[k], sampleRate, tempdata,
|
||||
@ -1607,7 +1607,7 @@ int NetEQTest_encode(int coder,
|
||||
if (!*vad) {
|
||||
// all channels are silent
|
||||
cdlen = 0;
|
||||
for (int k = 0; k < numChannels; k++) {
|
||||
for (size_t k = 0; k < numChannels; k++) {
|
||||
WebRtcCng_Encode(CNGenc_inst[k], &indata[k * frameLen],
|
||||
(frameLen <= 640 ? frameLen : 640) /* max 640 */,
|
||||
encoded, &tempLen, first_cng);
|
||||
@ -1621,9 +1621,9 @@ int NetEQTest_encode(int coder,
|
||||
}
|
||||
|
||||
// loop over all channels
|
||||
int totalLen = 0;
|
||||
size_t totalLen = 0;
|
||||
|
||||
for (int k = 0; k < numChannels; k++) {
|
||||
for (size_t k = 0; k < numChannels; k++) {
|
||||
/* Encode with the selected coder type */
|
||||
if (coder == webrtc::kDecoderPCMu) { /*g711 u-law */
|
||||
#ifdef CODEC_G711
|
||||
@ -1652,7 +1652,8 @@ int NetEQTest_encode(int coder,
|
||||
#endif
|
||||
#ifdef CODEC_ILBC
|
||||
else if (coder == webrtc::kDecoderILBC) { /*iLBC */
|
||||
cdlen = WebRtcIlbcfix_Encode(iLBCenc_inst[k], indata, frameLen, encoded);
|
||||
cdlen = static_cast<size_t>(std::max(
|
||||
WebRtcIlbcfix_Encode(iLBCenc_inst[k], indata, frameLen, encoded), 0));
|
||||
}
|
||||
#endif
|
||||
#if (defined(CODEC_ISAC) || \
|
||||
@ -1660,28 +1661,30 @@ int NetEQTest_encode(int coder,
|
||||
// NETEQ_ISACFIX_CODEC
|
||||
else if (coder == webrtc::kDecoderISAC) { /*iSAC */
|
||||
int noOfCalls = 0;
|
||||
cdlen = 0;
|
||||
while (cdlen <= 0) {
|
||||
int res = 0;
|
||||
while (res <= 0) {
|
||||
#ifdef CODEC_ISAC /* floating point */
|
||||
cdlen =
|
||||
res =
|
||||
WebRtcIsac_Encode(ISAC_inst[k], &indata[noOfCalls * 160], encoded);
|
||||
#else /* fixed point */
|
||||
cdlen = WebRtcIsacfix_Encode(ISAC_inst[k], &indata[noOfCalls * 160],
|
||||
encoded);
|
||||
res = WebRtcIsacfix_Encode(ISAC_inst[k], &indata[noOfCalls * 160],
|
||||
encoded);
|
||||
#endif
|
||||
noOfCalls++;
|
||||
}
|
||||
cdlen = static_cast<size_t>(res);
|
||||
}
|
||||
#endif
|
||||
#ifdef CODEC_ISAC_SWB
|
||||
else if (coder == webrtc::kDecoderISACswb) { /* iSAC SWB */
|
||||
int noOfCalls = 0;
|
||||
cdlen = 0;
|
||||
while (cdlen <= 0) {
|
||||
cdlen = WebRtcIsac_Encode(ISACSWB_inst[k], &indata[noOfCalls * 320],
|
||||
encoded);
|
||||
int res = 0;
|
||||
while (res <= 0) {
|
||||
res = WebRtcIsac_Encode(ISACSWB_inst[k], &indata[noOfCalls * 320],
|
||||
encoded);
|
||||
noOfCalls++;
|
||||
}
|
||||
cdlen = static_cast<size_t>(res);
|
||||
}
|
||||
#endif
|
||||
indata += frameLen;
|
||||
@ -1757,11 +1760,11 @@ int makeRedundantHeader(unsigned char* rtp_data,
|
||||
return rtpPointer - rtp_data; // length of header in bytes
|
||||
}
|
||||
|
||||
int makeDTMFpayload(unsigned char* payload_data,
|
||||
int Event,
|
||||
int End,
|
||||
int Volume,
|
||||
int Duration) {
|
||||
size_t makeDTMFpayload(unsigned char* payload_data,
|
||||
int Event,
|
||||
int End,
|
||||
int Volume,
|
||||
int Duration) {
|
||||
unsigned char E, R, V;
|
||||
R = 0;
|
||||
V = (unsigned char)Volume;
|
||||
@ -1778,11 +1781,11 @@ int makeDTMFpayload(unsigned char* payload_data,
|
||||
return (4);
|
||||
}
|
||||
|
||||
void stereoDeInterleave(int16_t* audioSamples, int numSamples) {
|
||||
void stereoDeInterleave(int16_t* audioSamples, size_t numSamples) {
|
||||
int16_t* tempVec;
|
||||
int16_t* readPtr, *writeL, *writeR;
|
||||
|
||||
if (numSamples <= 0)
|
||||
if (numSamples == 0)
|
||||
return;
|
||||
|
||||
tempVec = (int16_t*)malloc(sizeof(int16_t) * numSamples);
|
||||
@ -1797,7 +1800,7 @@ void stereoDeInterleave(int16_t* audioSamples, int numSamples) {
|
||||
writeR = &audioSamples[numSamples / 2];
|
||||
readPtr = tempVec;
|
||||
|
||||
for (int k = 0; k < numSamples; k += 2) {
|
||||
for (size_t k = 0; k < numSamples; k += 2) {
|
||||
*writeL = *readPtr;
|
||||
readPtr++;
|
||||
*writeR = *readPtr;
|
||||
@ -1809,7 +1812,7 @@ void stereoDeInterleave(int16_t* audioSamples, int numSamples) {
|
||||
free(tempVec);
|
||||
}
|
||||
|
||||
void stereoInterleave(unsigned char* data, int dataLen, int stride) {
|
||||
void stereoInterleave(unsigned char* data, size_t dataLen, size_t stride) {
|
||||
unsigned char* ptrL, *ptrR;
|
||||
unsigned char temp[10];
|
||||
|
||||
|
||||
@ -59,11 +59,11 @@ class NetEqIlbcQualityTest : public NetEqQualityTest {
|
||||
}
|
||||
|
||||
int EncodeBlock(int16_t* in_data,
|
||||
int block_size_samples,
|
||||
size_t block_size_samples,
|
||||
uint8_t* payload,
|
||||
int max_bytes) override {
|
||||
const int kFrameSizeSamples = 80; // Samples per 10 ms.
|
||||
int encoded_samples = 0;
|
||||
size_t max_bytes) override {
|
||||
const size_t kFrameSizeSamples = 80; // Samples per 10 ms.
|
||||
size_t encoded_samples = 0;
|
||||
uint32_t dummy_timestamp = 0;
|
||||
AudioEncoder::EncodedInfo info;
|
||||
do {
|
||||
|
||||
@ -43,8 +43,8 @@ class NetEqIsacQualityTest : public NetEqQualityTest {
|
||||
NetEqIsacQualityTest();
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
virtual int EncodeBlock(int16_t* in_data, int block_size_samples,
|
||||
uint8_t* payload, int max_bytes);
|
||||
virtual int EncodeBlock(int16_t* in_data, size_t block_size_samples,
|
||||
uint8_t* payload, size_t max_bytes);
|
||||
private:
|
||||
ISACFIX_MainStruct* isac_encoder_;
|
||||
int bit_rate_kbps_;
|
||||
@ -78,8 +78,8 @@ void NetEqIsacQualityTest::TearDown() {
|
||||
}
|
||||
|
||||
int NetEqIsacQualityTest::EncodeBlock(int16_t* in_data,
|
||||
int block_size_samples,
|
||||
uint8_t* payload, int max_bytes) {
|
||||
size_t block_size_samples,
|
||||
uint8_t* payload, size_t max_bytes) {
|
||||
// ISAC takes 10 ms for every call.
|
||||
const int subblocks = kIsacBlockDurationMs / 10;
|
||||
const int subblock_length = 10 * kIsacInputSamplingKhz;
|
||||
|
||||
@ -103,12 +103,12 @@ class NetEqOpusQualityTest : public NetEqQualityTest {
|
||||
NetEqOpusQualityTest();
|
||||
void SetUp() override;
|
||||
void TearDown() override;
|
||||
virtual int EncodeBlock(int16_t* in_data, int block_size_samples,
|
||||
uint8_t* payload, int max_bytes);
|
||||
virtual int EncodeBlock(int16_t* in_data, size_t block_size_samples,
|
||||
uint8_t* payload, size_t max_bytes);
|
||||
private:
|
||||
WebRtcOpusEncInst* opus_encoder_;
|
||||
OpusRepacketizer* repacketizer_;
|
||||
int sub_block_size_samples_;
|
||||
size_t sub_block_size_samples_;
|
||||
int bit_rate_kbps_;
|
||||
bool fec_;
|
||||
bool dtx_;
|
||||
@ -126,7 +126,8 @@ NetEqOpusQualityTest::NetEqOpusQualityTest()
|
||||
kDecoderOpus),
|
||||
opus_encoder_(NULL),
|
||||
repacketizer_(NULL),
|
||||
sub_block_size_samples_(kOpusBlockDurationMs * kOpusSamplingKhz),
|
||||
sub_block_size_samples_(
|
||||
static_cast<size_t>(kOpusBlockDurationMs * kOpusSamplingKhz)),
|
||||
bit_rate_kbps_(FLAGS_bit_rate_kbps),
|
||||
fec_(FLAGS_fec),
|
||||
dtx_(FLAGS_dtx),
|
||||
@ -173,8 +174,8 @@ void NetEqOpusQualityTest::TearDown() {
|
||||
}
|
||||
|
||||
int NetEqOpusQualityTest::EncodeBlock(int16_t* in_data,
|
||||
int block_size_samples,
|
||||
uint8_t* payload, int max_bytes) {
|
||||
size_t block_size_samples,
|
||||
uint8_t* payload, size_t max_bytes) {
|
||||
EXPECT_EQ(block_size_samples, sub_block_size_samples_ * sub_packets_);
|
||||
int16_t* pointer = in_data;
|
||||
int value;
|
||||
@ -192,7 +193,8 @@ int NetEqOpusQualityTest::EncodeBlock(int16_t* in_data,
|
||||
}
|
||||
pointer += sub_block_size_samples_ * channels_;
|
||||
}
|
||||
value = opus_repacketizer_out(repacketizer_, payload, max_bytes);
|
||||
value = opus_repacketizer_out(repacketizer_, payload,
|
||||
static_cast<opus_int32>(max_bytes));
|
||||
EXPECT_GE(value, 0);
|
||||
return value;
|
||||
}
|
||||
|
||||
@ -59,11 +59,11 @@ class NetEqPcmuQualityTest : public NetEqQualityTest {
|
||||
}
|
||||
|
||||
int EncodeBlock(int16_t* in_data,
|
||||
int block_size_samples,
|
||||
size_t block_size_samples,
|
||||
uint8_t* payload,
|
||||
int max_bytes) override {
|
||||
const int kFrameSizeSamples = 80; // Samples per 10 ms.
|
||||
int encoded_samples = 0;
|
||||
size_t max_bytes) override {
|
||||
const size_t kFrameSizeSamples = 80; // Samples per 10 ms.
|
||||
size_t encoded_samples = 0;
|
||||
uint32_t dummy_timestamp = 0;
|
||||
AudioEncoder::EncodedInfo info;
|
||||
do {
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
|
||||
#include <algorithm> // min, max
|
||||
|
||||
#include "webrtc/base/safe_conversions.h"
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
@ -23,9 +24,10 @@ TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,
|
||||
size_t input_len,
|
||||
bool fast_mode,
|
||||
AudioMultiVector* output,
|
||||
int16_t* length_change_samples) {
|
||||
size_t* length_change_samples) {
|
||||
// Pre-calculate common multiplication with |fs_mult_|.
|
||||
int fs_mult_120 = fs_mult_ * 120; // Corresponds to 15 ms.
|
||||
size_t fs_mult_120 =
|
||||
static_cast<size_t>(fs_mult_ * 120); // Corresponds to 15 ms.
|
||||
|
||||
const int16_t* signal;
|
||||
rtc::scoped_ptr<int16_t[]> signal_array;
|
||||
@ -48,8 +50,7 @@ TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,
|
||||
}
|
||||
|
||||
// Find maximum absolute value of input signal.
|
||||
max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal,
|
||||
static_cast<int>(signal_len));
|
||||
max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, signal_len);
|
||||
|
||||
// Downsample to 4 kHz sample rate and calculate auto-correlation.
|
||||
DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen,
|
||||
@ -58,13 +59,12 @@ TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,
|
||||
AutoCorrelation();
|
||||
|
||||
// Find the strongest correlation peak.
|
||||
static const int kNumPeaks = 1;
|
||||
int peak_index;
|
||||
static const size_t kNumPeaks = 1;
|
||||
size_t peak_index;
|
||||
int16_t peak_value;
|
||||
DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks,
|
||||
fs_mult_, &peak_index, &peak_value);
|
||||
// Assert that |peak_index| stays within boundaries.
|
||||
assert(peak_index >= 0);
|
||||
assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_);
|
||||
|
||||
// Compensate peak_index for displaced starting position. The displacement
|
||||
@ -73,13 +73,13 @@ TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,
|
||||
// multiplication by fs_mult_ * 2.
|
||||
peak_index += kMinLag * fs_mult_ * 2;
|
||||
// Assert that |peak_index| stays within boundaries.
|
||||
assert(peak_index >= 20 * fs_mult_);
|
||||
assert(peak_index >= static_cast<size_t>(20 * fs_mult_));
|
||||
assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_);
|
||||
|
||||
// Calculate scaling to ensure that |peak_index| samples can be square-summed
|
||||
// without overflowing.
|
||||
int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) -
|
||||
WebRtcSpl_NormW32(peak_index);
|
||||
WebRtcSpl_NormW32(static_cast<int32_t>(peak_index));
|
||||
scaling = std::max(0, scaling);
|
||||
|
||||
// |vec1| starts at 15 ms minus one pitch period.
|
||||
@ -177,7 +177,7 @@ void TimeStretch::AutoCorrelation() {
|
||||
}
|
||||
|
||||
bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,
|
||||
int peak_index, int scaling) const {
|
||||
size_t peak_index, int scaling) const {
|
||||
// Check if the signal seems to be active speech or not (simple VAD).
|
||||
// If (vec1_energy + vec2_energy) / (2 * peak_index) <=
|
||||
// 8 * background_noise_energy, then we say that the signal contains no
|
||||
@ -197,7 +197,8 @@ bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,
|
||||
int right_scale = 16 - WebRtcSpl_NormW32(right_side);
|
||||
right_scale = std::max(0, right_scale);
|
||||
left_side = left_side >> right_scale;
|
||||
right_side = peak_index * (right_side >> right_scale);
|
||||
right_side =
|
||||
rtc::checked_cast<int32_t>(peak_index) * (right_side >> right_scale);
|
||||
|
||||
// Scale |left_side| properly before comparing with |right_side|.
|
||||
// (|scaling| is the scale factor before energy calculation, thus the scale
|
||||
|
||||
@ -39,7 +39,7 @@ class TimeStretch {
|
||||
const BackgroundNoise& background_noise)
|
||||
: sample_rate_hz_(sample_rate_hz),
|
||||
fs_mult_(sample_rate_hz / 8000),
|
||||
num_channels_(static_cast<int>(num_channels)),
|
||||
num_channels_(num_channels),
|
||||
master_channel_(0), // First channel is master.
|
||||
background_noise_(background_noise),
|
||||
max_input_value_(0) {
|
||||
@ -48,7 +48,7 @@ class TimeStretch {
|
||||
sample_rate_hz_ == 32000 ||
|
||||
sample_rate_hz_ == 48000);
|
||||
assert(num_channels_ > 0);
|
||||
assert(static_cast<int>(master_channel_) < num_channels_);
|
||||
assert(master_channel_ < num_channels_);
|
||||
memset(auto_correlation_, 0, sizeof(auto_correlation_));
|
||||
}
|
||||
|
||||
@ -60,7 +60,7 @@ class TimeStretch {
|
||||
size_t input_len,
|
||||
bool fast_mode,
|
||||
AudioMultiVector* output,
|
||||
int16_t* length_change_samples);
|
||||
size_t* length_change_samples);
|
||||
|
||||
protected:
|
||||
// Sets the parameters |best_correlation| and |peak_index| to suitable
|
||||
@ -68,7 +68,7 @@ class TimeStretch {
|
||||
// implemented by the sub-classes.
|
||||
virtual void SetParametersForPassiveSpeech(size_t input_length,
|
||||
int16_t* best_correlation,
|
||||
int* peak_index) const = 0;
|
||||
size_t* peak_index) const = 0;
|
||||
|
||||
// Checks the criteria for performing the time-stretching operation and,
|
||||
// if possible, performs the time-stretching. This method must be implemented
|
||||
@ -82,16 +82,16 @@ class TimeStretch {
|
||||
bool fast_mode,
|
||||
AudioMultiVector* output) const = 0;
|
||||
|
||||
static const int kCorrelationLen = 50;
|
||||
static const int kLogCorrelationLen = 6; // >= log2(kCorrelationLen).
|
||||
static const int kMinLag = 10;
|
||||
static const int kMaxLag = 60;
|
||||
static const int kDownsampledLen = kCorrelationLen + kMaxLag;
|
||||
static const size_t kCorrelationLen = 50;
|
||||
static const size_t kLogCorrelationLen = 6; // >= log2(kCorrelationLen).
|
||||
static const size_t kMinLag = 10;
|
||||
static const size_t kMaxLag = 60;
|
||||
static const size_t kDownsampledLen = kCorrelationLen + kMaxLag;
|
||||
static const int kCorrelationThreshold = 14746; // 0.9 in Q14.
|
||||
|
||||
const int sample_rate_hz_;
|
||||
const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000.
|
||||
const int num_channels_;
|
||||
const size_t num_channels_;
|
||||
const size_t master_channel_;
|
||||
const BackgroundNoise& background_noise_;
|
||||
int16_t max_input_value_;
|
||||
@ -107,7 +107,7 @@ class TimeStretch {
|
||||
|
||||
// Performs a simple voice-activity detection based on the input parameters.
|
||||
bool SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,
|
||||
int peak_index, int scaling) const;
|
||||
size_t peak_index, int scaling) const;
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(TimeStretch);
|
||||
};
|
||||
|
||||
@ -75,12 +75,12 @@ class TimeStretchTest : public ::testing::Test {
|
||||
|
||||
// Returns the total length change (in samples) that the accelerate operation
|
||||
// resulted in during the run.
|
||||
int TestAccelerate(int loops, bool fast_mode) {
|
||||
size_t TestAccelerate(size_t loops, bool fast_mode) {
|
||||
Accelerate accelerate(sample_rate_hz_, kNumChannels, background_noise_);
|
||||
int total_length_change = 0;
|
||||
for (int i = 0; i < loops; ++i) {
|
||||
size_t total_length_change = 0;
|
||||
for (size_t i = 0; i < loops; ++i) {
|
||||
AudioMultiVector output(kNumChannels);
|
||||
int16_t length_change;
|
||||
size_t length_change;
|
||||
UpdateReturnStats(accelerate.Process(Next30Ms(), block_size_, fast_mode,
|
||||
&output, &length_change));
|
||||
total_length_change += length_change;
|
||||
@ -110,7 +110,7 @@ class TimeStretchTest : public ::testing::Test {
|
||||
|
||||
TEST_F(TimeStretchTest, Accelerate) {
|
||||
// TestAccelerate returns the total length change in samples.
|
||||
EXPECT_EQ(15268, TestAccelerate(100, false));
|
||||
EXPECT_EQ(15268U, TestAccelerate(100, false));
|
||||
EXPECT_EQ(9, return_stats_[TimeStretch::kSuccess]);
|
||||
EXPECT_EQ(58, return_stats_[TimeStretch::kSuccessLowEnergy]);
|
||||
EXPECT_EQ(33, return_stats_[TimeStretch::kNoStretch]);
|
||||
@ -118,7 +118,7 @@ TEST_F(TimeStretchTest, Accelerate) {
|
||||
|
||||
TEST_F(TimeStretchTest, AccelerateFastMode) {
|
||||
// TestAccelerate returns the total length change in samples.
|
||||
EXPECT_EQ(21400, TestAccelerate(100, true));
|
||||
EXPECT_EQ(21400U, TestAccelerate(100, true));
|
||||
EXPECT_EQ(31, return_stats_[TimeStretch::kSuccess]);
|
||||
EXPECT_EQ(58, return_stats_[TimeStretch::kSuccessLowEnergy]);
|
||||
EXPECT_EQ(11, return_stats_[TimeStretch::kNoStretch]);
|
||||
|
||||
@ -31,8 +31,8 @@ ConstantPcmPacketSource::ConstantPcmPacketSource(size_t payload_len_samples,
|
||||
seq_number_(0),
|
||||
timestamp_(0),
|
||||
payload_ssrc_(0xABCD1234) {
|
||||
int encoded_len = WebRtcPcm16b_Encode(&sample_value, 1, encoded_sample_);
|
||||
CHECK_EQ(2, encoded_len);
|
||||
size_t encoded_len = WebRtcPcm16b_Encode(&sample_value, 1, encoded_sample_);
|
||||
CHECK_EQ(2U, encoded_len);
|
||||
}
|
||||
|
||||
Packet* ConstantPcmPacketSource::NextPacket() {
|
||||
|
||||
@ -43,11 +43,11 @@ void NetEqExternalDecoderTest::InsertPacket(WebRtcRTPHeader rtp_header,
|
||||
rtp_header, payload, payload_size_bytes, receive_timestamp));
|
||||
}
|
||||
|
||||
int NetEqExternalDecoderTest::GetOutputAudio(size_t max_length,
|
||||
int16_t* output,
|
||||
NetEqOutputType* output_type) {
|
||||
size_t NetEqExternalDecoderTest::GetOutputAudio(size_t max_length,
|
||||
int16_t* output,
|
||||
NetEqOutputType* output_type) {
|
||||
// Get audio from regular instance.
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int num_channels;
|
||||
EXPECT_EQ(NetEq::kOK,
|
||||
neteq_->GetAudio(max_length,
|
||||
@ -56,7 +56,8 @@ int NetEqExternalDecoderTest::GetOutputAudio(size_t max_length,
|
||||
&num_channels,
|
||||
output_type));
|
||||
EXPECT_EQ(channels_, num_channels);
|
||||
EXPECT_EQ(kOutputLengthMs * sample_rate_hz_ / 1000, samples_per_channel);
|
||||
EXPECT_EQ(static_cast<size_t>(kOutputLengthMs * sample_rate_hz_ / 1000),
|
||||
samples_per_channel);
|
||||
return samples_per_channel;
|
||||
}
|
||||
|
||||
|
||||
@ -42,8 +42,8 @@ class NetEqExternalDecoderTest {
|
||||
|
||||
// Get 10 ms of audio data. The data is written to |output|, which can hold
|
||||
// (at least) |max_length| elements. Returns number of samples.
|
||||
int GetOutputAudio(size_t max_length, int16_t* output,
|
||||
NetEqOutputType* output_type);
|
||||
size_t GetOutputAudio(size_t max_length, int16_t* output,
|
||||
NetEqOutputType* output_type);
|
||||
|
||||
NetEq* neteq() { return neteq_.get(); }
|
||||
|
||||
|
||||
@ -101,19 +101,19 @@ int64_t NetEqPerformanceTest::Run(int runtime_ms,
|
||||
|
||||
// Get output audio, but don't do anything with it.
|
||||
static const int kMaxChannels = 1;
|
||||
static const int kMaxSamplesPerMs = 48000 / 1000;
|
||||
static const size_t kMaxSamplesPerMs = 48000 / 1000;
|
||||
static const int kOutputBlockSizeMs = 10;
|
||||
static const int kOutDataLen =
|
||||
static const size_t kOutDataLen =
|
||||
kOutputBlockSizeMs * kMaxSamplesPerMs * kMaxChannels;
|
||||
int16_t out_data[kOutDataLen];
|
||||
int num_channels;
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int error = neteq->GetAudio(kOutDataLen, out_data, &samples_per_channel,
|
||||
&num_channels, NULL);
|
||||
if (error != NetEq::kOK)
|
||||
return -1;
|
||||
|
||||
assert(samples_per_channel == kSampRateHz * 10 / 1000);
|
||||
assert(samples_per_channel == static_cast<size_t>(kSampRateHz * 10 / 1000));
|
||||
|
||||
time_now_ms += kOutputBlockSizeMs;
|
||||
if (time_now_ms >= runtime_ms / 2 && !drift_flipped) {
|
||||
|
||||
@ -218,8 +218,9 @@ NetEqQualityTest::NetEqQualityTest(int block_duration_ms,
|
||||
block_duration_ms_(block_duration_ms),
|
||||
in_sampling_khz_(in_sampling_khz),
|
||||
out_sampling_khz_(out_sampling_khz),
|
||||
in_size_samples_(in_sampling_khz_ * block_duration_ms_),
|
||||
out_size_samples_(out_sampling_khz_ * kOutputSizeMs),
|
||||
in_size_samples_(
|
||||
static_cast<size_t>(in_sampling_khz_ * block_duration_ms_)),
|
||||
out_size_samples_(static_cast<size_t>(out_sampling_khz_ * kOutputSizeMs)),
|
||||
payload_size_bytes_(0),
|
||||
max_payload_bytes_(0),
|
||||
in_file_(new ResampleInputAudioFile(FLAGS_in_filename,
|
||||
@ -392,7 +393,7 @@ int NetEqQualityTest::Transmit() {
|
||||
|
||||
int NetEqQualityTest::DecodeBlock() {
|
||||
int channels;
|
||||
int samples;
|
||||
size_t samples;
|
||||
int ret = neteq_->GetAudio(out_size_samples_ * channels_, &out_data_[0],
|
||||
&samples, &channels, NULL);
|
||||
|
||||
@ -400,9 +401,9 @@ int NetEqQualityTest::DecodeBlock() {
|
||||
return -1;
|
||||
} else {
|
||||
assert(channels == channels_);
|
||||
assert(samples == kOutputSizeMs * out_sampling_khz_);
|
||||
assert(samples == static_cast<size_t>(kOutputSizeMs * out_sampling_khz_));
|
||||
CHECK(output_->WriteArray(out_data_.get(), samples * channels));
|
||||
return samples;
|
||||
return static_cast<int>(samples);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -76,8 +76,8 @@ class NetEqQualityTest : public ::testing::Test {
|
||||
// |block_size_samples| (samples per channel),
|
||||
// 2. save the bit stream to |payload| of |max_bytes| bytes in size,
|
||||
// 3. returns the length of the payload (in bytes),
|
||||
virtual int EncodeBlock(int16_t* in_data, int block_size_samples,
|
||||
uint8_t* payload, int max_bytes) = 0;
|
||||
virtual int EncodeBlock(int16_t* in_data, size_t block_size_samples,
|
||||
uint8_t* payload, size_t max_bytes) = 0;
|
||||
|
||||
// PacketLost(...) determines weather a packet sent at an indicated time gets
|
||||
// lost or not.
|
||||
@ -111,13 +111,13 @@ class NetEqQualityTest : public ::testing::Test {
|
||||
const int out_sampling_khz_;
|
||||
|
||||
// Number of samples per channel in a frame.
|
||||
const int in_size_samples_;
|
||||
const size_t in_size_samples_;
|
||||
|
||||
// Expected output number of samples per channel in a frame.
|
||||
const int out_size_samples_;
|
||||
const size_t out_size_samples_;
|
||||
|
||||
size_t payload_size_bytes_;
|
||||
int max_payload_bytes_;
|
||||
size_t max_payload_bytes_;
|
||||
|
||||
rtc::scoped_ptr<InputAudioFile> in_file_;
|
||||
rtc::scoped_ptr<AudioSink> output_;
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
|
||||
#include "google/gflags.h"
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/base/safe_conversions.h"
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
|
||||
@ -324,7 +325,7 @@ size_t ReplacePayload(webrtc::test::InputAudioFile* replacement_audio_file,
|
||||
// Encode it as PCM16.
|
||||
assert((*payload).get());
|
||||
payload_len = WebRtcPcm16b_Encode((*replacement_audio).get(),
|
||||
static_cast<int16_t>(*frame_size_samples),
|
||||
*frame_size_samples,
|
||||
(*payload).get());
|
||||
assert(payload_len == 2 * *frame_size_samples);
|
||||
// Change payload type to PCM16.
|
||||
@ -358,7 +359,7 @@ size_t ReplacePayload(webrtc::test::InputAudioFile* replacement_audio_file,
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
static const int kMaxChannels = 5;
|
||||
static const int kMaxSamplesPerMs = 48000 / 1000;
|
||||
static const size_t kMaxSamplesPerMs = 48000 / 1000;
|
||||
static const int kOutputBlockSizeMs = 10;
|
||||
|
||||
std::string program_name = argv[0];
|
||||
@ -552,11 +553,11 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
// Check if it is time to get output audio.
|
||||
if (time_now_ms >= next_output_time_ms) {
|
||||
static const int kOutDataLen =
|
||||
static const size_t kOutDataLen =
|
||||
kOutputBlockSizeMs * kMaxSamplesPerMs * kMaxChannels;
|
||||
int16_t out_data[kOutDataLen];
|
||||
int num_channels;
|
||||
int samples_per_channel;
|
||||
size_t samples_per_channel;
|
||||
int error = neteq->GetAudio(kOutDataLen, out_data, &samples_per_channel,
|
||||
&num_channels, NULL);
|
||||
if (error != NetEq::kOK) {
|
||||
@ -564,7 +565,8 @@ int main(int argc, char* argv[]) {
|
||||
neteq->LastError() << std::endl;
|
||||
} else {
|
||||
// Calculate sample rate from output size.
|
||||
sample_rate_hz = 1000 * samples_per_channel / kOutputBlockSizeMs;
|
||||
sample_rate_hz = rtc::checked_cast<int>(
|
||||
1000 * samples_per_channel / kOutputBlockSizeMs);
|
||||
}
|
||||
|
||||
// Write to file.
|
||||
|
||||
@ -26,14 +26,11 @@ bool ResampleInputAudioFile::Read(size_t samples,
|
||||
if (!InputAudioFile::Read(samples_to_read, temp_destination.get()))
|
||||
return false;
|
||||
resampler_.ResetIfNeeded(file_rate_hz_, output_rate_hz, 1);
|
||||
int output_length = 0;
|
||||
CHECK_EQ(resampler_.Push(temp_destination.get(),
|
||||
static_cast<int>(samples_to_read),
|
||||
destination,
|
||||
static_cast<int>(samples),
|
||||
output_length),
|
||||
size_t output_length = 0;
|
||||
CHECK_EQ(resampler_.Push(temp_destination.get(), samples_to_read, destination,
|
||||
samples, output_length),
|
||||
0);
|
||||
CHECK_EQ(static_cast<int>(samples), output_length);
|
||||
CHECK_EQ(samples, output_length);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user