Revert "Add IntelligibilityEnhancer support to audioproc_float"

Revert reason: I unintentionally added a patch when rebasing that is breaking the bots.

This reverts commit 98c69a0ee785adeb9d95fffeb55cdb6cedbe82c6.

BUG=

Review URL: https://codereview.webrtc.org/1837313002 .

Cr-Commit-Position: refs/heads/master@{#12148}
This commit is contained in:
Alejandro Luebs
2016-03-29 13:05:40 -07:00
parent 98c69a0ee7
commit dd56fa8642
7 changed files with 38 additions and 99 deletions

View File

@ -29,8 +29,8 @@ const int kWindowSizeMs = 16;
const int kChunkSizeMs = 10; // Size provided by APM.
const float kClipFreqKhz = 0.2f;
const float kKbdAlpha = 1.5f;
const double kLambdaBot = -1.0 / (1 << 30); // Extreme values in bisection
const double kLambdaTop = -1e-5 / (1 << 30); // search for lamda.
const float kLambdaBot = -1.0f; // Extreme values in bisection
const float kLambdaTop = -1e-5f; // search for lamda.
const float kVoiceProbabilityThreshold = 0.02f;
// Number of chunks after voice activity which is still considered speech.
const size_t kSpeechOffsetDelay = 80;
@ -162,12 +162,12 @@ void IntelligibilityEnhancer::SolveForLambda(float power_target) {
const float reciprocal_power_target =
1.f / (power_target + std::numeric_limits<float>::epsilon());
double lambda_bot = kLambdaBot;
double lambda_top = kLambdaTop;
float lambda_bot = kLambdaBot;
float lambda_top = kLambdaTop;
float power_ratio = 2.f; // Ratio of achieved power to target power.
int iters = 0;
while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
const double lambda = (lambda_bot + lambda_top) / 2.0;
const float lambda = (lambda_bot + lambda_top) / 2.f;
SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
const float power =
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
@ -267,7 +267,7 @@ std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
return filter_bank;
}
void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda,
void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
size_t start_freq,
float* sols) {
const float kMinPower = 1e-5f;
@ -284,19 +284,19 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda,
if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
sols[n] = 1.f;
} else {
const double gamma0 = 0.5 * kRho * pow_x0[n] * pow_n0[n] +
const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
const double beta0 =
lambda * pow_x0[n] * (2.0 - kRho) * pow_x0[n] * pow_n0[n];
const double alpha0 =
lambda * pow_x0[n] * (1.0 - kRho) * pow_x0[n] * pow_x0[n];
RTC_DCHECK_LT(alpha0, 0.0);
const float beta0 =
lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
const float alpha0 =
lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
RTC_DCHECK_LT(alpha0, 0.f);
// The quadratic equation should always have real roots, but to guard
// against numerical errors we limit it to a minimum of zero.
sols[n] = std::max(
0.0, (-beta0 - std::sqrt(std::max(
0.0, beta0 * beta0 - 4.0 * alpha0 * gamma0))) /
(2.0 * alpha0));
0.f, (-beta0 - std::sqrt(std::max(
0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) /
(2.f * alpha0));
}
}
}

View File

@ -71,7 +71,7 @@ class IntelligibilityEnhancer : public LappedTransform::Callback {
// Analytically solves quadratic for optimal gains given |lambda|.
// Negative gains are set to 0. Stores the results in |sols|.
void SolveForGainsGivenLambda(double lambda, size_t start_freq, float* sols);
void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
// Returns true if the audio is speech.
bool IsSpeech(const float* audio);

View File

@ -56,6 +56,7 @@ void void_main(int argc, char* argv[]) {
noise_file.num_channels());
while (in_file.ReadSamples(in.size(), in.data()) == in.size() &&
noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) {
FloatS16ToFloat(in.data(), in.size(), in.data());
FloatS16ToFloat(noise.data(), noise.size(), noise.data());
Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(),
in_buf.channels());
@ -69,6 +70,7 @@ void void_main(int argc, char* argv[]) {
in_file.num_channels());
Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(),
in.data());
FloatToFloatS16(in.data(), in.size(), in.data());
out_file.WriteSamples(in.data(), in.size());
}
}

View File

@ -177,15 +177,17 @@ std::vector<float> NoiseSuppressionImpl::NoiseEstimate() {
rtc::CritScope cs(crit_);
std::vector<float> noise_estimate;
#if defined(WEBRTC_NS_FLOAT)
const float kNormalizationFactor = 1.f / (1 << 15);
noise_estimate.assign(WebRtcNs_num_freq(), 0.f);
for (auto& suppressor : suppressors_) {
const float* noise = WebRtcNs_noise_estimate(suppressor->state());
for (size_t i = 0; i < noise_estimate.size(); ++i) {
noise_estimate[i] += noise[i] / suppressors_.size();
noise_estimate[i] +=
kNormalizationFactor * noise[i] / suppressors_.size();
}
}
#elif defined(WEBRTC_NS_FIXED)
const float kNormalizationFactor = 1.f / (1 << 9);
const float kNormalizationFactor = 1.f / (1 << 23);
noise_estimate.assign(WebRtcNsx_num_freq(), 0.f);
for (auto& suppressor : suppressors_) {
const uint32_t* noise = WebRtcNsx_noise_estimate(suppressor->state());

View File

@ -42,39 +42,14 @@ ChannelBuffer<float> GetChannelBuffer(const WavFile& file) {
WavFileProcessor::WavFileProcessor(std::unique_ptr<AudioProcessing> ap,
std::unique_ptr<WavReader> in_file,
std::unique_ptr<WavWriter> out_file,
std::unique_ptr<WavReader> reverse_in_file,
std::unique_ptr<WavWriter> reverse_out_file)
std::unique_ptr<WavWriter> out_file)
: ap_(std::move(ap)),
in_buf_(GetChannelBuffer(*in_file)),
out_buf_(GetChannelBuffer(*out_file)),
input_config_(GetStreamConfig(*in_file)),
output_config_(GetStreamConfig(*out_file)),
buffer_reader_(std::move(in_file)),
buffer_writer_(std::move(out_file)) {
if (reverse_in_file) {
const WavFile* reverse_out_config;
if (reverse_out_file) {
reverse_out_config = reverse_out_file.get();
} else {
reverse_out_config = reverse_in_file.get();
}
reverse_in_buf_.reset(
new ChannelBuffer<float>(GetChannelBuffer(*reverse_in_file)));
reverse_out_buf_.reset(
new ChannelBuffer<float>(GetChannelBuffer(*reverse_out_config)));
reverse_input_config_.reset(
new StreamConfig(GetStreamConfig(*reverse_in_file)));
reverse_output_config_.reset(
new StreamConfig(GetStreamConfig(*reverse_out_config)));
reverse_buffer_reader_.reset(
new ChannelBufferWavReader(std::move(reverse_in_file)));
if (reverse_out_file) {
reverse_buffer_writer_.reset(
new ChannelBufferWavWriter(std::move(reverse_out_file)));
}
}
}
buffer_writer_(std::move(out_file)) {}
bool WavFileProcessor::ProcessChunk() {
if (!buffer_reader_.Read(&in_buf_)) {
@ -87,22 +62,6 @@ bool WavFileProcessor::ProcessChunk() {
output_config_, out_buf_.channels()));
}
buffer_writer_.Write(out_buf_);
if (reverse_buffer_reader_) {
if (!reverse_buffer_reader_->Read(reverse_in_buf_.get())) {
return false;
}
{
const auto st = ScopedTimer(mutable_proc_time());
RTC_CHECK_EQ(kNoErr,
ap_->ProcessReverseStream(reverse_in_buf_->channels(),
*reverse_input_config_.get(),
*reverse_output_config_.get(),
reverse_out_buf_->channels()));
}
if (reverse_buffer_writer_) {
reverse_buffer_writer_->Write(*reverse_out_buf_.get());
}
}
return true;
}

View File

@ -86,9 +86,7 @@ class WavFileProcessor final : public AudioFileProcessor {
// Takes ownership of all parameters.
WavFileProcessor(std::unique_ptr<AudioProcessing> ap,
std::unique_ptr<WavReader> in_file,
std::unique_ptr<WavWriter> out_file,
std::unique_ptr<WavReader> reverse_in_file,
std::unique_ptr<WavWriter> reverse_out_file);
std::unique_ptr<WavWriter> out_file);
virtual ~WavFileProcessor() {}
// Processes one chunk from the WAV input and writes to the WAV output.
@ -103,12 +101,6 @@ class WavFileProcessor final : public AudioFileProcessor {
const StreamConfig output_config_;
ChannelBufferWavReader buffer_reader_;
ChannelBufferWavWriter buffer_writer_;
std::unique_ptr<ChannelBuffer<float>> reverse_in_buf_;
std::unique_ptr<ChannelBuffer<float>> reverse_out_buf_;
std::unique_ptr<StreamConfig> reverse_input_config_;
std::unique_ptr<StreamConfig> reverse_output_config_;
std::unique_ptr<ChannelBufferWavReader> reverse_buffer_reader_;
std::unique_ptr<ChannelBufferWavWriter> reverse_buffer_writer_;
};
// Used to read from an aecdump file and write to a WavWriter.

View File

@ -42,20 +42,10 @@ DEFINE_string(
o,
"out.wav",
"Name of the output file to write the processed capture stream to.");
DEFINE_string(ri, "", "Name of the render input stream file to read from.");
DEFINE_string(
ro,
"out_reverse.wav",
"Name of the output file to write the processed render stream to.");
DEFINE_int32(out_channels, 1, "Number of output channels.");
const bool out_channels_dummy =
google::RegisterFlagValidator(&FLAGS_out_channels, &ValidateOutChannels);
DEFINE_int32(rev_out_channels, 1, "Number of reverse output channels.");
const bool rev_out_channels_dummy =
google::RegisterFlagValidator(&FLAGS_rev_out_channels,
&ValidateOutChannels);
DEFINE_int32(out_sample_rate, 48000, "Output sample rate in Hz.");
DEFINE_int32(rev_out_sample_rate, 48000, "Reverse output sample rate in Hz.");
DEFINE_string(mic_positions, "",
"Space delimited cartesian coordinates of microphones in meters. "
"The coordinates of each point are contiguous. "
@ -87,7 +77,8 @@ const char kUsage[] =
"an input capture WAV file or protobuf debug dump and writes to an output\n"
"WAV file.\n"
"\n"
"All components are disabled by default.";
"All components are disabled by default. If any bi-directional components\n"
"are enabled, only debug dump files are permitted.";
} // namespace
@ -100,6 +91,15 @@ int main(int argc, char* argv[]) {
"An input file must be specified with either -i or -dump.\n");
return 1;
}
if (FLAGS_dump.empty() && (FLAGS_aec || FLAGS_ie)) {
fprintf(stderr, "-aec and -ie require a -dump file.\n");
return 1;
}
if (FLAGS_ie) {
fprintf(stderr,
"FIXME(ajm): The intelligibility enhancer output is not dumped.\n");
return 1;
}
test::TraceToStderr trace_to_stderr(true);
Config config;
@ -135,24 +135,8 @@ int main(int argc, char* argv[]) {
if (FLAGS_dump.empty()) {
auto in_file = std::unique_ptr<WavReader>(new WavReader(FLAGS_i));
std::cout << FLAGS_i << ": " << in_file->FormatAsString() << std::endl;
std::unique_ptr<WavReader> reverse_in_file;
std::unique_ptr<WavWriter> reverse_out_file;
if (!FLAGS_ri.empty()) {
reverse_in_file.reset(new WavReader(FLAGS_ri));
reverse_out_file.reset(new WavWriter(
FLAGS_ro,
FLAGS_rev_out_sample_rate,
static_cast<size_t>(FLAGS_rev_out_channels)));
std::cout << FLAGS_ri << ": "
<< reverse_in_file->FormatAsString() << std::endl;
std::cout << FLAGS_ro << ": "
<< reverse_out_file->FormatAsString() << std::endl;
}
processor.reset(new WavFileProcessor(std::move(ap),
std::move(in_file),
std::move(out_file),
std::move(reverse_in_file),
std::move(reverse_out_file)));
processor.reset(new WavFileProcessor(std::move(ap), std::move(in_file),
std::move(out_file)));
} else {
processor.reset(new AecDumpFileProcessor(