Prevent crash in NetEQ when decoder overflow.

NetEQ can crash when decoder gives too many output samples than it can handle. A practical case this happens is when multiple opus packets are combined. The best solution is to pass the max size to the ACM decode function and let it return a failure if the max size if too small. BUG=4361 R=henrik.lundin@webrtc.org Review URL: https://webrtc-codereview.appspot.com/45619004 Cr-Commit-Position: refs/heads/master@{#8730} git-svn-id: http://webrtc.googlecode.com/svn/trunk@8730 4adac7df-926f-26a2-2b94-8c16560cd09d
2015-03-16 12:30:37 +00:00
parent 4b89aa03bb
commit 7f7d7e3427
19 changed files with 453 additions and 188 deletions
--- a/webrtc/modules/audio_coding/codecs/audio_decoder.cc
+++ b/webrtc/modules/audio_coding/codecs/audio_decoder.cc
@ -16,12 +16,40 @@

 namespace webrtc {

-int AudioDecoder::DecodeRedundant(const uint8_t* encoded,
-                                  size_t encoded_len,
-                                  int sample_rate_hz,
-                                  int16_t* decoded,
-                                  SpeechType* speech_type) {
-  return Decode(encoded, encoded_len, sample_rate_hz, decoded, speech_type);
+int AudioDecoder::Decode(const uint8_t* encoded, size_t encoded_len,
+                         int sample_rate_hz, size_t max_decoded_bytes,
+                         int16_t* decoded, SpeechType* speech_type) {
+  int duration = PacketDuration(encoded, encoded_len);
+  if (duration >= 0 && duration * sizeof(int16_t) > max_decoded_bytes) {
+    return -1;
+  }
+  return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
+                        speech_type);
+}
+
+int AudioDecoder::DecodeRedundant(const uint8_t* encoded, size_t encoded_len,
+                                  int sample_rate_hz, size_t max_decoded_bytes,
+                                  int16_t* decoded, SpeechType* speech_type) {
+  int duration = PacketDurationRedundant(encoded, encoded_len);
+  if (duration >= 0 && duration * sizeof(int16_t) > max_decoded_bytes) {
+    return -1;
+  }
+  return DecodeRedundantInternal(encoded, encoded_len, sample_rate_hz, decoded,
+                                 speech_type);
+}
+
+int AudioDecoder::DecodeInternal(const uint8_t* encoded, size_t encoded_len,
+                                 int sample_rate_hz, int16_t* decoded,
+                                 SpeechType* speech_type) {
+  return kNotImplemented;
+}
+
+int AudioDecoder::DecodeRedundantInternal(const uint8_t* encoded,
+                                          size_t encoded_len,
+                                          int sample_rate_hz, int16_t* decoded,
+                                          SpeechType* speech_type) {
+  return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
+                        speech_type);
 }

 bool AudioDecoder::HasDecodePlc() const { return false; }
--- a/webrtc/modules/audio_coding/codecs/audio_decoder.h
+++ b/webrtc/modules/audio_coding/codecs/audio_decoder.h
@ -35,22 +35,25 @@ class AudioDecoder {
  virtual ~AudioDecoder() {}

  // Decodes |encode_len| bytes from |encoded| and writes the result in
-  // |decoded|. The number of samples from all channels produced is in
-  // the return value. If the decoder produced comfort noise, |speech_type|
+  // |decoded|. The maximum bytes allowed to be written into |decoded| is
+  // |max_decoded_bytes|. The number of samples from all channels produced is
+  // in the return value. If the decoder produced comfort noise, |speech_type|
  // is set to kComfortNoise, otherwise it is kSpeech. The desired output
  // sample rate is provided in |sample_rate_hz|, which must be valid for the
  // codec at hand.
  virtual int Decode(const uint8_t* encoded,
                     size_t encoded_len,
                     int sample_rate_hz,
+                     size_t max_decoded_bytes,
                     int16_t* decoded,
-                     SpeechType* speech_type) = 0;
+                     SpeechType* speech_type);

  // Same as Decode(), but interfaces to the decoders redundant decode function.
  // The default implementation simply calls the regular Decode() method.
  virtual int DecodeRedundant(const uint8_t* encoded,
                              size_t encoded_len,
                              int sample_rate_hz,
+                              size_t max_decoded_bytes,
                              int16_t* decoded,
                              SpeechType* speech_type);

@ -99,6 +102,18 @@ class AudioDecoder {
 protected:
  static SpeechType ConvertSpeechType(int16_t type);

+  virtual int DecodeInternal(const uint8_t* encoded,
+                             size_t encoded_len,
+                             int sample_rate_hz,
+                             int16_t* decoded,
+                             SpeechType* speech_type);
+
+  virtual int DecodeRedundantInternal(const uint8_t* encoded,
+                                      size_t encoded_len,
+                                      int sample_rate_hz,
+                                      int16_t* decoded,
+                                      SpeechType* speech_type);
+
  size_t channels_;

 private:
--- a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h
+++ b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h
@ -72,11 +72,6 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder {
  int Max10MsFramesInAPacket() const override;

  // AudioDecoder methods.
-  int Decode(const uint8_t* encoded,
-             size_t encoded_len,
-             int sample_rate_hz,
-             int16_t* decoded,
-             SpeechType* speech_type) override;
  bool HasDecodePlc() const override;
  int DecodePlc(int num_frames, int16_t* decoded) override;
  int Init() override;
@ -95,6 +90,13 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder {
                      uint8_t* encoded,
                      EncodedInfo* info) override;

+  // AudioDecoder protected method.
+  int DecodeInternal(const uint8_t* encoded,
+                     size_t encoded_len,
+                     int sample_rate_hz,
+                     int16_t* decoded,
+                     SpeechType* speech_type) override;
+
 private:
  // This value is taken from STREAM_SIZE_MAX_60 for iSAC float (60 ms) and
  // STREAM_MAXW16_60MS for iSAC fix (60 ms).
--- a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h
+++ b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h
@ -218,11 +218,11 @@ void AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t rtp_timestamp,
 }

 template <typename T>
-int AudioEncoderDecoderIsacT<T>::Decode(const uint8_t* encoded,
-                                        size_t encoded_len,
-                                        int sample_rate_hz,
-                                        int16_t* decoded,
-                                        SpeechType* speech_type) {
+int AudioEncoderDecoderIsacT<T>::DecodeInternal(const uint8_t* encoded,
+                                                size_t encoded_len,
+                                                int sample_rate_hz,
+                                                int16_t* decoded,
+                                                SpeechType* speech_type) {
  CriticalSectionScoped cs(state_lock_.get());
  CHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000)
      << "Unsupported sample rate " << sample_rate_hz;
@ -232,8 +232,8 @@ int AudioEncoderDecoderIsacT<T>::Decode(const uint8_t* encoded,
  }
  int16_t temp_type = 1;  // Default is speech.
  int16_t ret =
-      T::Decode(isac_state_, encoded, static_cast<int16_t>(encoded_len),
-                decoded, &temp_type);
+      T::DecodeInternal(isac_state_, encoded, static_cast<int16_t>(encoded_len),
+                        decoded, &temp_type);
  *speech_type = ConvertSpeechType(temp_type);
  return ret;
 }
--- a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h
+++ b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h
@ -36,11 +36,11 @@ struct IsacFix {
  static inline int16_t Create(instance_type** inst) {
    return WebRtcIsacfix_Create(inst);
  }
-  static inline int16_t Decode(instance_type* inst,
-                               const uint8_t* encoded,
-                               int16_t len,
-                               int16_t* decoded,
-                               int16_t* speech_type) {
+  static inline int16_t DecodeInternal(instance_type* inst,
+                                       const uint8_t* encoded,
+                                       int16_t len,
+                                       int16_t* decoded,
+                                       int16_t* speech_type) {
    return WebRtcIsacfix_Decode(inst, encoded, len, decoded, speech_type);
  }
  static inline int16_t DecodePlc(instance_type* inst,
--- a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h
+++ b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h
@ -35,11 +35,11 @@ struct IsacFloat {
  static inline int16_t Create(instance_type** inst) {
    return WebRtcIsac_Create(inst);
  }
-  static inline int16_t Decode(instance_type* inst,
-                               const uint8_t* encoded,
-                               int16_t len,
-                               int16_t* decoded,
-                               int16_t* speech_type) {
+  static inline int16_t DecodeInternal(instance_type* inst,
+                                       const uint8_t* encoded,
+                                       int16_t len,
+                                       int16_t* decoded,
+                                       int16_t* speech_type) {
    return WebRtcIsac_Decode(inst, encoded, len, decoded, speech_type);
  }
  static inline int16_t DecodePlc(instance_type* inst,
--- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
@ -554,6 +554,54 @@ TEST_P(OpusTest, OpusDurationEstimation) {
  EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
 }

+TEST_P(OpusTest, OpusDecodeRepacketized) {
+  const int kPackets = 6;
+
+  PrepareSpeechData(channels_, 20, 20 * kPackets);
+
+  // Create encoder memory.
+  ASSERT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_,
+                                        channels_,
+                                        application_));
+  ASSERT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_,
+                                        channels_));
+
+  // Set bitrate.
+  EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_,
+                                     channels_ == 1 ? 32000 : 64000));
+
+  // Check number of channels for decoder.
+  EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_));
+
+  // Encode & decode.
+  int16_t audio_type;
+  rtc::scoped_ptr<int16_t[]> output_data_decode(
+      new int16_t[kPackets * kOpus20msFrameSamples * channels_]);
+  OpusRepacketizer* rp = opus_repacketizer_create();
+
+  for (int idx = 0; idx < kPackets; idx++) {
+    encoded_bytes_ = WebRtcOpus_Encode(opus_encoder_,
+                                       speech_data_.GetNextBlock(),
+                                       kOpus20msFrameSamples, kMaxBytes,
+                                       bitstream_);
+    EXPECT_EQ(OPUS_OK, opus_repacketizer_cat(rp, bitstream_, encoded_bytes_));
+  }
+
+  encoded_bytes_ = opus_repacketizer_out(rp, bitstream_, kMaxBytes);
+
+  EXPECT_EQ(kOpus20msFrameSamples * kPackets,
+            WebRtcOpus_DurationEst(opus_decoder_, bitstream_, encoded_bytes_));
+
+  EXPECT_EQ(kOpus20msFrameSamples * kPackets,
+            WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_,
+                              output_data_decode.get(), &audio_type));
+
+  // Free memory.
+  opus_repacketizer_destroy(rp);
+  EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
+  EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
+}
+
 INSTANTIATE_TEST_CASE_P(VariousMode,
                        OpusTest,
                        Combine(Values(1, 2), Values(0, 1)));