Remove AudioClassifier
BUG=webrtc:5676 Review-Url: https://codereview.webrtc.org/2615983002 Cr-Commit-Position: refs/heads/master@{#15933}
This commit is contained in:
@ -936,14 +936,6 @@ rtc_static_library("audio_network_adaptor") {
|
||||
}
|
||||
}
|
||||
|
||||
config("neteq_config") {
|
||||
include_dirs = [
|
||||
# Need Opus header files for the audio classifier.
|
||||
"//third_party/opus/src/celt",
|
||||
"//third_party/opus/src/src",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_static_library("neteq") {
|
||||
# TODO(kjellander): Remove (bugs.webrtc.org/6828)
|
||||
# Cyclic dependency with :audio_coding if enabled.
|
||||
@ -952,8 +944,6 @@ rtc_static_library("neteq") {
|
||||
sources = [
|
||||
"neteq/accelerate.cc",
|
||||
"neteq/accelerate.h",
|
||||
"neteq/audio_classifier.cc",
|
||||
"neteq/audio_classifier.h",
|
||||
"neteq/audio_decoder_impl.cc",
|
||||
"neteq/audio_decoder_impl.h",
|
||||
"neteq/audio_multi_vector.cc",
|
||||
@ -1025,8 +1015,6 @@ rtc_static_library("neteq") {
|
||||
"neteq/timestamp_scaler.h",
|
||||
]
|
||||
|
||||
public_configs = [ ":neteq_config" ]
|
||||
|
||||
deps = [
|
||||
":audio_decoder_factory_interface",
|
||||
":audio_decoder_interface",
|
||||
@ -1109,7 +1097,6 @@ if (rtc_include_tests) {
|
||||
":RTPtimeshift",
|
||||
":acm_receive_test",
|
||||
":acm_send_test",
|
||||
":audio_classifier_test",
|
||||
":audio_codec_speed_tests",
|
||||
":audio_decoder_unittests",
|
||||
":audio_decoder_unittests",
|
||||
@ -1717,18 +1704,6 @@ if (rtc_include_tests) {
|
||||
]
|
||||
}
|
||||
|
||||
rtc_executable("audio_classifier_test") {
|
||||
testonly = true
|
||||
sources = [
|
||||
"neteq/test/audio_classifier_test.cc",
|
||||
]
|
||||
deps = [
|
||||
":neteq",
|
||||
":webrtc_opus",
|
||||
"../../system_wrappers:system_wrappers_default",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_executable("neteq_ilbc_quality_test") {
|
||||
testonly = true
|
||||
|
||||
|
||||
@ -1,75 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_classifier.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kDefaultSampleRateHz = 48000;
|
||||
static const int kDefaultFrameRateHz = 50;
|
||||
static const int kDefaultFrameSizeSamples =
|
||||
kDefaultSampleRateHz / kDefaultFrameRateHz;
|
||||
static const float kDefaultThreshold = 0.5f;
|
||||
|
||||
AudioClassifier::AudioClassifier()
|
||||
: analysis_info_(),
|
||||
is_music_(false),
|
||||
music_probability_(0),
|
||||
// This actually assigns the pointer to a static constant struct
|
||||
// rather than creates a struct and |celt_mode_| does not need
|
||||
// to be deleted.
|
||||
celt_mode_(opus_custom_mode_create(kDefaultSampleRateHz,
|
||||
kDefaultFrameSizeSamples,
|
||||
NULL)),
|
||||
analysis_state_() {
|
||||
assert(celt_mode_);
|
||||
}
|
||||
|
||||
AudioClassifier::~AudioClassifier() {}
|
||||
|
||||
bool AudioClassifier::Analysis(const int16_t* input,
|
||||
int input_length,
|
||||
int channels) {
|
||||
// Must be 20 ms frames at 48 kHz sampling.
|
||||
assert((input_length / channels) == kDefaultFrameSizeSamples);
|
||||
|
||||
// Only mono or stereo are allowed.
|
||||
assert(channels == 1 || channels == 2);
|
||||
|
||||
// Call Opus' classifier, defined in
|
||||
// "third_party/opus/src/src/analysis.h", with lsb_depth = 16.
|
||||
// Also uses a down-mixing function downmix_int, defined in
|
||||
// "third_party/opus/src/src/opus_private.h", with
|
||||
// constants c1 = 0, and c2 = -2.
|
||||
run_analysis(&analysis_state_,
|
||||
celt_mode_,
|
||||
input,
|
||||
kDefaultFrameSizeSamples,
|
||||
kDefaultFrameSizeSamples,
|
||||
0,
|
||||
-2,
|
||||
channels,
|
||||
kDefaultSampleRateHz,
|
||||
16,
|
||||
downmix_int,
|
||||
&analysis_info_);
|
||||
music_probability_ = analysis_info_.music_prob;
|
||||
is_music_ = music_probability_ > kDefaultThreshold;
|
||||
return is_music_;
|
||||
}
|
||||
|
||||
bool AudioClassifier::is_music() const {
|
||||
return is_music_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -1,58 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_CLASSIFIER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_CLASSIFIER_H_
|
||||
|
||||
#include "webrtc/base/ignore_wundef.h"
|
||||
|
||||
extern "C" {
|
||||
RTC_PUSH_IGNORING_WUNDEF()
|
||||
#include "celt.h"
|
||||
RTC_POP_IGNORING_WUNDEF()
|
||||
#include "analysis.h"
|
||||
#include "opus_private.h"
|
||||
}
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This class provides a speech/music classification and is a wrapper over the
|
||||
// Opus classifier. It currently only supports 48 kHz mono or stereo with a
|
||||
// frame size of 20 ms.
|
||||
|
||||
class AudioClassifier {
|
||||
public:
|
||||
AudioClassifier();
|
||||
virtual ~AudioClassifier();
|
||||
|
||||
// Classifies one frame of audio data in input,
|
||||
// input_length : must be channels * 960;
|
||||
// channels : must be 1 (mono) or 2 (stereo).
|
||||
bool Analysis(const int16_t* input, int input_length, int channels);
|
||||
|
||||
// Gets the current classification : true = music, false = speech.
|
||||
virtual bool is_music() const;
|
||||
|
||||
// Gets the current music probability.
|
||||
float music_probability() const { return music_probability_; }
|
||||
|
||||
private:
|
||||
AnalysisInfo analysis_info_;
|
||||
bool is_music_;
|
||||
float music_probability_;
|
||||
const CELTMode* celt_mode_;
|
||||
TonalityAnalysisState analysis_state_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_CLASSIFIER_H_
|
||||
@ -1,82 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_classifier.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const size_t kFrameSize = 960;
|
||||
|
||||
TEST(AudioClassifierTest, AllZeroInput) {
|
||||
int16_t in_mono[kFrameSize] = {0};
|
||||
|
||||
// Test all-zero vectors and let the classifier converge from its default
|
||||
// to the expected value.
|
||||
AudioClassifier zero_classifier;
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
zero_classifier.Analysis(in_mono, kFrameSize, 1);
|
||||
}
|
||||
EXPECT_TRUE(zero_classifier.is_music());
|
||||
}
|
||||
|
||||
void RunAnalysisTest(const std::string& audio_filename,
|
||||
const std::string& data_filename,
|
||||
size_t channels) {
|
||||
AudioClassifier classifier;
|
||||
std::unique_ptr<int16_t[]> in(new int16_t[channels * kFrameSize]);
|
||||
bool is_music_ref;
|
||||
|
||||
FILE* audio_file = fopen(audio_filename.c_str(), "rb");
|
||||
ASSERT_TRUE(audio_file != NULL) << "Failed to open file " << audio_filename
|
||||
<< std::endl;
|
||||
FILE* data_file = fopen(data_filename.c_str(), "rb");
|
||||
ASSERT_TRUE(audio_file != NULL) << "Failed to open file " << audio_filename
|
||||
<< std::endl;
|
||||
while (fread(in.get(), sizeof(int16_t), channels * kFrameSize, audio_file) ==
|
||||
channels * kFrameSize) {
|
||||
bool is_music =
|
||||
classifier.Analysis(in.get(), channels * kFrameSize, channels);
|
||||
EXPECT_EQ(is_music, classifier.is_music());
|
||||
ASSERT_EQ(1u, fread(&is_music_ref, sizeof(is_music_ref), 1, data_file));
|
||||
EXPECT_EQ(is_music_ref, is_music);
|
||||
}
|
||||
fclose(audio_file);
|
||||
fclose(data_file);
|
||||
}
|
||||
|
||||
TEST(AudioClassifierTest, DoAnalysisMono) {
|
||||
#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64)
|
||||
RunAnalysisTest(test::ResourcePath("short_mixed_mono_48", "pcm"),
|
||||
test::ResourcePath("short_mixed_mono_48_arm", "dat"),
|
||||
1);
|
||||
#else
|
||||
RunAnalysisTest(test::ResourcePath("short_mixed_mono_48", "pcm"),
|
||||
test::ResourcePath("short_mixed_mono_48", "dat"),
|
||||
1);
|
||||
#endif // WEBRTC_ARCH_ARM
|
||||
}
|
||||
|
||||
TEST(AudioClassifierTest, DoAnalysisStereo) {
|
||||
RunAnalysisTest(test::ResourcePath("short_mixed_stereo_48", "pcm"),
|
||||
test::ResourcePath("short_mixed_stereo_48", "dat"),
|
||||
2);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -76,7 +76,6 @@ class NetEq {
|
||||
struct Config {
|
||||
Config()
|
||||
: sample_rate_hz(16000),
|
||||
enable_audio_classifier(false),
|
||||
enable_post_decode_vad(false),
|
||||
max_packets_in_buffer(50),
|
||||
// |max_delay_ms| has the same effect as calling SetMaximumDelay().
|
||||
@ -88,7 +87,6 @@ class NetEq {
|
||||
std::string ToString() const;
|
||||
|
||||
int sample_rate_hz; // Initial value. Will change with input data.
|
||||
bool enable_audio_classifier;
|
||||
bool enable_post_decode_vad;
|
||||
size_t max_packets_in_buffer;
|
||||
int max_delay_ms;
|
||||
|
||||
@ -19,8 +19,7 @@ namespace webrtc {
|
||||
|
||||
std::string NetEq::Config::ToString() const {
|
||||
std::stringstream ss;
|
||||
ss << "sample_rate_hz=" << sample_rate_hz << ", enable_audio_classifier="
|
||||
<< (enable_audio_classifier ? "true" : "false")
|
||||
ss << "sample_rate_hz=" << sample_rate_hz
|
||||
<< ", enable_post_decode_vad="
|
||||
<< (enable_post_decode_vad ? "true" : "false")
|
||||
<< ", max_packets_in_buffer=" << max_packets_in_buffer
|
||||
|
||||
@ -1,104 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_classifier.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 5) {
|
||||
std::cout << "Usage: " << argv[0] <<
|
||||
" channels output_type <input file name> <output file name> "
|
||||
<< std::endl << std::endl;
|
||||
std::cout << "Where channels can be 1 (mono) or 2 (interleaved stereo),";
|
||||
std::cout << " outputs can be 1 (classification (boolean)) or 2";
|
||||
std::cout << " (classification and music probability (float)),"
|
||||
<< std::endl;
|
||||
std::cout << "and the sampling frequency is assumed to be 48 kHz."
|
||||
<< std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const int kFrameSizeSamples = 960;
|
||||
int channels = atoi(argv[1]);
|
||||
if (channels < 1 || channels > 2) {
|
||||
std::cout << "Disallowed number of channels " << channels << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int outputs = atoi(argv[2]);
|
||||
if (outputs < 1 || outputs > 2) {
|
||||
std::cout << "Disallowed number of outputs " << outputs << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
const int data_size = channels * kFrameSizeSamples;
|
||||
std::unique_ptr<int16_t[]> in(new int16_t[data_size]);
|
||||
|
||||
std::string input_filename = argv[3];
|
||||
std::string output_filename = argv[4];
|
||||
|
||||
std::cout << "Input file: " << input_filename << std::endl;
|
||||
std::cout << "Output file: " << output_filename << std::endl;
|
||||
|
||||
FILE* in_file = fopen(input_filename.c_str(), "rb");
|
||||
if (!in_file) {
|
||||
std::cout << "Cannot open input file " << input_filename << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
FILE* out_file = fopen(output_filename.c_str(), "wb");
|
||||
if (!out_file) {
|
||||
std::cout << "Cannot open output file " << output_filename << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
webrtc::AudioClassifier classifier;
|
||||
int frame_counter = 0;
|
||||
int music_counter = 0;
|
||||
while (fread(in.get(), sizeof(*in.get()),
|
||||
data_size, in_file) == (size_t) data_size) {
|
||||
bool is_music = classifier.Analysis(in.get(), data_size, channels);
|
||||
if (!fwrite(&is_music, sizeof(is_music), 1, out_file)) {
|
||||
std::cout << "Error writing." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
if (is_music) {
|
||||
music_counter++;
|
||||
}
|
||||
std::cout << "frame " << frame_counter << " decision " << is_music;
|
||||
if (outputs == 2) {
|
||||
float music_prob = classifier.music_probability();
|
||||
if (!fwrite(&music_prob, sizeof(music_prob), 1, out_file)) {
|
||||
std::cout << "Error writing." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
std::cout << " music prob " << music_prob;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
frame_counter++;
|
||||
}
|
||||
std::cout << frame_counter << " frames processed." << std::endl;
|
||||
if (frame_counter > 0) {
|
||||
float music_percentage = music_counter / static_cast<float>(frame_counter);
|
||||
std::cout << music_percentage << " percent music." << std::endl;
|
||||
}
|
||||
|
||||
fclose(in_file);
|
||||
fclose(out_file);
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user