Revert "Add IntelligibilityEnhancer support to audioproc_float"
Revert reason: I unintentionally added a patch when rebasing that is breaking the bots. This reverts commit 98c69a0ee785adeb9d95fffeb55cdb6cedbe82c6. BUG= Review URL: https://codereview.webrtc.org/1837313002 . Cr-Commit-Position: refs/heads/master@{#12148}
This commit is contained in:
@ -29,8 +29,8 @@ const int kWindowSizeMs = 16;
|
||||
const int kChunkSizeMs = 10; // Size provided by APM.
|
||||
const float kClipFreqKhz = 0.2f;
|
||||
const float kKbdAlpha = 1.5f;
|
||||
const double kLambdaBot = -1.0 / (1 << 30); // Extreme values in bisection
|
||||
const double kLambdaTop = -1e-5 / (1 << 30); // search for lamda.
|
||||
const float kLambdaBot = -1.0f; // Extreme values in bisection
|
||||
const float kLambdaTop = -1e-5f; // search for lamda.
|
||||
const float kVoiceProbabilityThreshold = 0.02f;
|
||||
// Number of chunks after voice activity which is still considered speech.
|
||||
const size_t kSpeechOffsetDelay = 80;
|
||||
@ -162,12 +162,12 @@ void IntelligibilityEnhancer::SolveForLambda(float power_target) {
|
||||
|
||||
const float reciprocal_power_target =
|
||||
1.f / (power_target + std::numeric_limits<float>::epsilon());
|
||||
double lambda_bot = kLambdaBot;
|
||||
double lambda_top = kLambdaTop;
|
||||
float lambda_bot = kLambdaBot;
|
||||
float lambda_top = kLambdaTop;
|
||||
float power_ratio = 2.f; // Ratio of achieved power to target power.
|
||||
int iters = 0;
|
||||
while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
|
||||
const double lambda = (lambda_bot + lambda_top) / 2.0;
|
||||
const float lambda = (lambda_bot + lambda_top) / 2.f;
|
||||
SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
|
||||
const float power =
|
||||
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
|
||||
@ -267,7 +267,7 @@ std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
|
||||
return filter_bank;
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda,
|
||||
void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
|
||||
size_t start_freq,
|
||||
float* sols) {
|
||||
const float kMinPower = 1e-5f;
|
||||
@ -284,19 +284,19 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(double lambda,
|
||||
if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
|
||||
sols[n] = 1.f;
|
||||
} else {
|
||||
const double gamma0 = 0.5 * kRho * pow_x0[n] * pow_n0[n] +
|
||||
const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
|
||||
lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
|
||||
const double beta0 =
|
||||
lambda * pow_x0[n] * (2.0 - kRho) * pow_x0[n] * pow_n0[n];
|
||||
const double alpha0 =
|
||||
lambda * pow_x0[n] * (1.0 - kRho) * pow_x0[n] * pow_x0[n];
|
||||
RTC_DCHECK_LT(alpha0, 0.0);
|
||||
const float beta0 =
|
||||
lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
|
||||
const float alpha0 =
|
||||
lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
|
||||
RTC_DCHECK_LT(alpha0, 0.f);
|
||||
// The quadratic equation should always have real roots, but to guard
|
||||
// against numerical errors we limit it to a minimum of zero.
|
||||
sols[n] = std::max(
|
||||
0.0, (-beta0 - std::sqrt(std::max(
|
||||
0.0, beta0 * beta0 - 4.0 * alpha0 * gamma0))) /
|
||||
(2.0 * alpha0));
|
||||
0.f, (-beta0 - std::sqrt(std::max(
|
||||
0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) /
|
||||
(2.f * alpha0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -71,7 +71,7 @@ class IntelligibilityEnhancer : public LappedTransform::Callback {
|
||||
|
||||
// Analytically solves quadratic for optimal gains given |lambda|.
|
||||
// Negative gains are set to 0. Stores the results in |sols|.
|
||||
void SolveForGainsGivenLambda(double lambda, size_t start_freq, float* sols);
|
||||
void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
|
||||
|
||||
// Returns true if the audio is speech.
|
||||
bool IsSpeech(const float* audio);
|
||||
|
||||
@ -56,6 +56,7 @@ void void_main(int argc, char* argv[]) {
|
||||
noise_file.num_channels());
|
||||
while (in_file.ReadSamples(in.size(), in.data()) == in.size() &&
|
||||
noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) {
|
||||
FloatS16ToFloat(in.data(), in.size(), in.data());
|
||||
FloatS16ToFloat(noise.data(), noise.size(), noise.data());
|
||||
Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(),
|
||||
in_buf.channels());
|
||||
@ -69,6 +70,7 @@ void void_main(int argc, char* argv[]) {
|
||||
in_file.num_channels());
|
||||
Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(),
|
||||
in.data());
|
||||
FloatToFloatS16(in.data(), in.size(), in.data());
|
||||
out_file.WriteSamples(in.data(), in.size());
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,15 +177,17 @@ std::vector<float> NoiseSuppressionImpl::NoiseEstimate() {
|
||||
rtc::CritScope cs(crit_);
|
||||
std::vector<float> noise_estimate;
|
||||
#if defined(WEBRTC_NS_FLOAT)
|
||||
const float kNormalizationFactor = 1.f / (1 << 15);
|
||||
noise_estimate.assign(WebRtcNs_num_freq(), 0.f);
|
||||
for (auto& suppressor : suppressors_) {
|
||||
const float* noise = WebRtcNs_noise_estimate(suppressor->state());
|
||||
for (size_t i = 0; i < noise_estimate.size(); ++i) {
|
||||
noise_estimate[i] += noise[i] / suppressors_.size();
|
||||
noise_estimate[i] +=
|
||||
kNormalizationFactor * noise[i] / suppressors_.size();
|
||||
}
|
||||
}
|
||||
#elif defined(WEBRTC_NS_FIXED)
|
||||
const float kNormalizationFactor = 1.f / (1 << 9);
|
||||
const float kNormalizationFactor = 1.f / (1 << 23);
|
||||
noise_estimate.assign(WebRtcNsx_num_freq(), 0.f);
|
||||
for (auto& suppressor : suppressors_) {
|
||||
const uint32_t* noise = WebRtcNsx_noise_estimate(suppressor->state());
|
||||
|
||||
@ -42,39 +42,14 @@ ChannelBuffer<float> GetChannelBuffer(const WavFile& file) {
|
||||
|
||||
WavFileProcessor::WavFileProcessor(std::unique_ptr<AudioProcessing> ap,
|
||||
std::unique_ptr<WavReader> in_file,
|
||||
std::unique_ptr<WavWriter> out_file,
|
||||
std::unique_ptr<WavReader> reverse_in_file,
|
||||
std::unique_ptr<WavWriter> reverse_out_file)
|
||||
std::unique_ptr<WavWriter> out_file)
|
||||
: ap_(std::move(ap)),
|
||||
in_buf_(GetChannelBuffer(*in_file)),
|
||||
out_buf_(GetChannelBuffer(*out_file)),
|
||||
input_config_(GetStreamConfig(*in_file)),
|
||||
output_config_(GetStreamConfig(*out_file)),
|
||||
buffer_reader_(std::move(in_file)),
|
||||
buffer_writer_(std::move(out_file)) {
|
||||
if (reverse_in_file) {
|
||||
const WavFile* reverse_out_config;
|
||||
if (reverse_out_file) {
|
||||
reverse_out_config = reverse_out_file.get();
|
||||
} else {
|
||||
reverse_out_config = reverse_in_file.get();
|
||||
}
|
||||
reverse_in_buf_.reset(
|
||||
new ChannelBuffer<float>(GetChannelBuffer(*reverse_in_file)));
|
||||
reverse_out_buf_.reset(
|
||||
new ChannelBuffer<float>(GetChannelBuffer(*reverse_out_config)));
|
||||
reverse_input_config_.reset(
|
||||
new StreamConfig(GetStreamConfig(*reverse_in_file)));
|
||||
reverse_output_config_.reset(
|
||||
new StreamConfig(GetStreamConfig(*reverse_out_config)));
|
||||
reverse_buffer_reader_.reset(
|
||||
new ChannelBufferWavReader(std::move(reverse_in_file)));
|
||||
if (reverse_out_file) {
|
||||
reverse_buffer_writer_.reset(
|
||||
new ChannelBufferWavWriter(std::move(reverse_out_file)));
|
||||
}
|
||||
}
|
||||
}
|
||||
buffer_writer_(std::move(out_file)) {}
|
||||
|
||||
bool WavFileProcessor::ProcessChunk() {
|
||||
if (!buffer_reader_.Read(&in_buf_)) {
|
||||
@ -87,22 +62,6 @@ bool WavFileProcessor::ProcessChunk() {
|
||||
output_config_, out_buf_.channels()));
|
||||
}
|
||||
buffer_writer_.Write(out_buf_);
|
||||
if (reverse_buffer_reader_) {
|
||||
if (!reverse_buffer_reader_->Read(reverse_in_buf_.get())) {
|
||||
return false;
|
||||
}
|
||||
{
|
||||
const auto st = ScopedTimer(mutable_proc_time());
|
||||
RTC_CHECK_EQ(kNoErr,
|
||||
ap_->ProcessReverseStream(reverse_in_buf_->channels(),
|
||||
*reverse_input_config_.get(),
|
||||
*reverse_output_config_.get(),
|
||||
reverse_out_buf_->channels()));
|
||||
}
|
||||
if (reverse_buffer_writer_) {
|
||||
reverse_buffer_writer_->Write(*reverse_out_buf_.get());
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -86,9 +86,7 @@ class WavFileProcessor final : public AudioFileProcessor {
|
||||
// Takes ownership of all parameters.
|
||||
WavFileProcessor(std::unique_ptr<AudioProcessing> ap,
|
||||
std::unique_ptr<WavReader> in_file,
|
||||
std::unique_ptr<WavWriter> out_file,
|
||||
std::unique_ptr<WavReader> reverse_in_file,
|
||||
std::unique_ptr<WavWriter> reverse_out_file);
|
||||
std::unique_ptr<WavWriter> out_file);
|
||||
virtual ~WavFileProcessor() {}
|
||||
|
||||
// Processes one chunk from the WAV input and writes to the WAV output.
|
||||
@ -103,12 +101,6 @@ class WavFileProcessor final : public AudioFileProcessor {
|
||||
const StreamConfig output_config_;
|
||||
ChannelBufferWavReader buffer_reader_;
|
||||
ChannelBufferWavWriter buffer_writer_;
|
||||
std::unique_ptr<ChannelBuffer<float>> reverse_in_buf_;
|
||||
std::unique_ptr<ChannelBuffer<float>> reverse_out_buf_;
|
||||
std::unique_ptr<StreamConfig> reverse_input_config_;
|
||||
std::unique_ptr<StreamConfig> reverse_output_config_;
|
||||
std::unique_ptr<ChannelBufferWavReader> reverse_buffer_reader_;
|
||||
std::unique_ptr<ChannelBufferWavWriter> reverse_buffer_writer_;
|
||||
};
|
||||
|
||||
// Used to read from an aecdump file and write to a WavWriter.
|
||||
|
||||
@ -42,20 +42,10 @@ DEFINE_string(
|
||||
o,
|
||||
"out.wav",
|
||||
"Name of the output file to write the processed capture stream to.");
|
||||
DEFINE_string(ri, "", "Name of the render input stream file to read from.");
|
||||
DEFINE_string(
|
||||
ro,
|
||||
"out_reverse.wav",
|
||||
"Name of the output file to write the processed render stream to.");
|
||||
DEFINE_int32(out_channels, 1, "Number of output channels.");
|
||||
const bool out_channels_dummy =
|
||||
google::RegisterFlagValidator(&FLAGS_out_channels, &ValidateOutChannels);
|
||||
DEFINE_int32(rev_out_channels, 1, "Number of reverse output channels.");
|
||||
const bool rev_out_channels_dummy =
|
||||
google::RegisterFlagValidator(&FLAGS_rev_out_channels,
|
||||
&ValidateOutChannels);
|
||||
DEFINE_int32(out_sample_rate, 48000, "Output sample rate in Hz.");
|
||||
DEFINE_int32(rev_out_sample_rate, 48000, "Reverse output sample rate in Hz.");
|
||||
DEFINE_string(mic_positions, "",
|
||||
"Space delimited cartesian coordinates of microphones in meters. "
|
||||
"The coordinates of each point are contiguous. "
|
||||
@ -87,7 +77,8 @@ const char kUsage[] =
|
||||
"an input capture WAV file or protobuf debug dump and writes to an output\n"
|
||||
"WAV file.\n"
|
||||
"\n"
|
||||
"All components are disabled by default.";
|
||||
"All components are disabled by default. If any bi-directional components\n"
|
||||
"are enabled, only debug dump files are permitted.";
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -100,6 +91,15 @@ int main(int argc, char* argv[]) {
|
||||
"An input file must be specified with either -i or -dump.\n");
|
||||
return 1;
|
||||
}
|
||||
if (FLAGS_dump.empty() && (FLAGS_aec || FLAGS_ie)) {
|
||||
fprintf(stderr, "-aec and -ie require a -dump file.\n");
|
||||
return 1;
|
||||
}
|
||||
if (FLAGS_ie) {
|
||||
fprintf(stderr,
|
||||
"FIXME(ajm): The intelligibility enhancer output is not dumped.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
test::TraceToStderr trace_to_stderr(true);
|
||||
Config config;
|
||||
@ -135,24 +135,8 @@ int main(int argc, char* argv[]) {
|
||||
if (FLAGS_dump.empty()) {
|
||||
auto in_file = std::unique_ptr<WavReader>(new WavReader(FLAGS_i));
|
||||
std::cout << FLAGS_i << ": " << in_file->FormatAsString() << std::endl;
|
||||
std::unique_ptr<WavReader> reverse_in_file;
|
||||
std::unique_ptr<WavWriter> reverse_out_file;
|
||||
if (!FLAGS_ri.empty()) {
|
||||
reverse_in_file.reset(new WavReader(FLAGS_ri));
|
||||
reverse_out_file.reset(new WavWriter(
|
||||
FLAGS_ro,
|
||||
FLAGS_rev_out_sample_rate,
|
||||
static_cast<size_t>(FLAGS_rev_out_channels)));
|
||||
std::cout << FLAGS_ri << ": "
|
||||
<< reverse_in_file->FormatAsString() << std::endl;
|
||||
std::cout << FLAGS_ro << ": "
|
||||
<< reverse_out_file->FormatAsString() << std::endl;
|
||||
}
|
||||
processor.reset(new WavFileProcessor(std::move(ap),
|
||||
std::move(in_file),
|
||||
std::move(out_file),
|
||||
std::move(reverse_in_file),
|
||||
std::move(reverse_out_file)));
|
||||
processor.reset(new WavFileProcessor(std::move(ap), std::move(in_file),
|
||||
std::move(out_file)));
|
||||
|
||||
} else {
|
||||
processor.reset(new AecDumpFileProcessor(
|
||||
|
||||
Reference in New Issue
Block a user