Move out typing detection to its own class.

This will allow an embedder to use it directly.

Adding inertia/hangover time between updates of the reported detection status to the algorithm, controlled by a parameter. That is usually desired and this way a consumer of
the class don't have to implement that. (VoiceEngine will let it be 1, which results in the same behavior as before, and keep controlling the hangover itself.)

R=andrew@webrtc.org, niklas.enbom@webrtc.org, xians@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/6219004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@5462 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
henrikg@webrtc.org
2014-01-30 09:50:46 +00:00
parent cf1b51b6fb
commit c693704cc2
6 changed files with 226 additions and 92 deletions

View File

@ -69,6 +69,8 @@
'noise_suppression_impl.h', 'noise_suppression_impl.h',
'processing_component.cc', 'processing_component.cc',
'processing_component.h', 'processing_component.h',
'typing_detection.cc',
'typing_detection.h',
'utility/delay_estimator.c', 'utility/delay_estimator.c',
'utility/delay_estimator.h', 'utility/delay_estimator.h',
'utility/delay_estimator_internal.h', 'utility/delay_estimator_internal.h',

View File

@ -0,0 +1,90 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/typing_detection.h"
namespace webrtc {
TypingDetection::TypingDetection()
: time_active_(0),
time_since_last_typing_(0),
penalty_counter_(0),
counter_since_last_detection_update_(0),
detection_to_report_(false),
new_detection_to_report_(false),
time_window_(10),
cost_per_typing_(100),
reporting_threshold_(300),
penalty_decay_(1),
type_event_delay_(2),
report_detection_update_period_(1) {
}
TypingDetection::~TypingDetection() {}
bool TypingDetection::Process(bool key_pressed, bool vad_activity) {
if (vad_activity)
time_active_++;
else
time_active_ = 0;
// Keep track if time since last typing event
if (key_pressed)
time_since_last_typing_ = 0;
else
++time_since_last_typing_;
if (time_since_last_typing_ < type_event_delay_ &&
vad_activity &&
time_active_ < time_window_) {
penalty_counter_ += cost_per_typing_;
if (penalty_counter_ > reporting_threshold_)
new_detection_to_report_ = true;
}
if (penalty_counter_ > 0)
penalty_counter_ -= penalty_decay_;
if (++counter_since_last_detection_update_ ==
report_detection_update_period_) {
detection_to_report_ = new_detection_to_report_;
new_detection_to_report_ = false;
counter_since_last_detection_update_ = 0;
}
return detection_to_report_;
}
int TypingDetection::TimeSinceLastDetectionInSeconds() {
// Round to whole seconds.
return (time_since_last_typing_ + 50) / 100;
}
void TypingDetection::SetParameters(int time_window,
int cost_per_typing,
int reporting_threshold,
int penalty_decay,
int type_event_delay,
int report_detection_update_period) {
if (time_window) time_window_ = time_window;
if (cost_per_typing) cost_per_typing_ = cost_per_typing;
if (reporting_threshold) reporting_threshold_ = reporting_threshold;
if (penalty_decay) penalty_decay_ = penalty_decay;
if (type_event_delay) type_event_delay_ = type_event_delay;
if (report_detection_update_period)
report_detection_update_period_ = report_detection_update_period;
}
} // namespace webrtc

View File

@ -0,0 +1,93 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class TypingDetection {
public:
TypingDetection();
virtual ~TypingDetection();
// Run the detection algortihm. Shall be called every 10 ms. Returns true if
// typing is detected, or false if not, based on the update period as set with
// SetParameters(). See |report_detection_update_period_| description below.
bool Process(bool key_pressed, bool vad_activity);
// Gets the time in seconds since the last detection.
int TimeSinceLastDetectionInSeconds();
// Sets the algorithm parameters. A parameter value of 0 leaves it unchanged.
// See the correspondning member variables below for descriptions.
void SetParameters(int time_window,
int cost_per_typing,
int reporting_threshold,
int penalty_decay,
int type_event_delay,
int report_detection_update_period);
private:
int time_active_;
int time_since_last_typing_;
int penalty_counter_;
// Counter since last time the detection status reported by Process() was
// updated. See also |report_detection_update_period_|.
int counter_since_last_detection_update_;
// The detection status to report. Updated every
// |report_detection_update_period_| call to Process().
bool detection_to_report_;
// What |detection_to_report_| should be set to next time it is updated.
bool new_detection_to_report_;
// Settable threshold values.
// Number of 10 ms slots accepted to count as a hit.
int time_window_;
// Penalty added for a typing + activity coincide.
int cost_per_typing_;
// Threshold for |penalty_counter_|.
int reporting_threshold_;
// How much we reduce |penalty_counter_| every 10 ms.
int penalty_decay_;
// How old typing events we allow.
int type_event_delay_;
// Settable update period.
// Number of 10 ms slots between each update of the detection status returned
// by Process(). This inertia added to the algorithm is usually desirable and
// provided so that consumers of the class don't have to implement that
// themselves if they don't wish.
// If set to 1, each call to Process() will return the detection status for
// that 10 ms slot.
// If set to N (where N > 1), the detection status returned from Process()
// will remain the same until Process() has been called N times. Then, if none
// of the last N calls to Process() has detected typing for each respective
// 10 ms slot, Process() will return false. If at least one of the last N
// calls has detected typing, Process() will return true. And that returned
// status will then remain the same until the next N calls have been done.
int report_detection_update_period_;
};
} // namespace webrtc
#endif // #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_

View File

@ -83,6 +83,8 @@ void MyObserver::CallbackOnError(int channel, int err_code) {
// Add printf for other error codes here // Add printf for other error codes here
if (err_code == VE_TYPING_NOISE_WARNING) { if (err_code == VE_TYPING_NOISE_WARNING) {
printf(" TYPING NOISE DETECTED \n"); printf(" TYPING NOISE DETECTED \n");
} else if (err_code == VE_TYPING_NOISE_OFF_WARNING) {
printf(" TYPING NOISE OFF DETECTED \n");
} else if (err_code == VE_RECEIVE_PACKET_TIMEOUT) { } else if (err_code == VE_RECEIVE_PACKET_TIMEOUT) {
printf(" RECEIVE PACKET TIMEOUT \n"); printf(" RECEIVE PACKET TIMEOUT \n");
} else if (err_code == VE_PACKET_RECEIPT_RESTARTED) { } else if (err_code == VE_PACKET_RECEIPT_RESTARTED) {
@ -456,7 +458,7 @@ void RunTest(std::string out_path) {
printf("%i. Toggle microphone mute \n", option_index++); printf("%i. Toggle microphone mute \n", option_index++);
printf("%i. Toggle on hold status \n", option_index++); printf("%i. Toggle on hold status \n", option_index++);
printf("%i. Get last error code \n", option_index++); printf("%i. Get last error code \n", option_index++);
printf("%i. Toggle typing detection (for Mac/Windows only) \n", printf("%i. Toggle typing detection \n",
option_index++); option_index++);
printf("%i. Record a PCM file \n", option_index++); printf("%i. Record a PCM file \n", option_index++);
printf("%i. Play a previously recorded PCM file locally \n", printf("%i. Play a previously recorded PCM file locally \n",

View File

@ -47,17 +47,19 @@ TransmitMixer::OnPeriodicProcess()
if (_voiceEngineObserverPtr) if (_voiceEngineObserverPtr)
{ {
if (_typingNoiseDetected) { if (_typingNoiseDetected) {
WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, -1), WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, -1),
"TransmitMixer::OnPeriodicProcess() => " "TransmitMixer::OnPeriodicProcess() => "
"CallbackOnError(VE_TYPING_NOISE_WARNING)"); "CallbackOnError(VE_TYPING_NOISE_WARNING)");
_voiceEngineObserverPtr->CallbackOnError(-1, _voiceEngineObserverPtr->CallbackOnError(
VE_TYPING_NOISE_WARNING); -1,
VE_TYPING_NOISE_WARNING);
} else { } else {
WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, -1), WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, -1),
"TransmitMixer::OnPeriodicProcess() => " "TransmitMixer::OnPeriodicProcess() => "
"CallbackOnError(VE_TYPING_NOISE_OFF_WARNING)"); "CallbackOnError(VE_TYPING_NOISE_OFF_WARNING)");
_voiceEngineObserverPtr->CallbackOnError( _voiceEngineObserverPtr->CallbackOnError(
-1, VE_TYPING_NOISE_OFF_WARNING); -1,
VE_TYPING_NOISE_OFF_WARNING);
} }
} }
_typingNoiseWarningPending = false; _typingNoiseWarningPending = false;
@ -194,16 +196,8 @@ TransmitMixer::TransmitMixer(uint32_t instanceId) :
_critSect(*CriticalSectionWrapper::CreateCriticalSection()), _critSect(*CriticalSectionWrapper::CreateCriticalSection()),
_callbackCritSect(*CriticalSectionWrapper::CreateCriticalSection()), _callbackCritSect(*CriticalSectionWrapper::CreateCriticalSection()),
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION #ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
_timeActive(0),
_timeSinceLastTyping(0),
_penaltyCounter(0),
_typingNoiseWarningPending(false), _typingNoiseWarningPending(false),
_typingNoiseDetected(false), _typingNoiseDetected(false),
_timeWindow(10), // 10ms slots accepted to count as a hit
_costPerTyping(100), // Penalty added for a typing + activity coincide
_reportingThreshold(300), // Threshold for _penaltyCounter
_penaltyDecay(1), // how much we reduce _penaltyCounter every 10 ms.
_typeEventDelay(2), // how "old" event we check for
#endif #endif
_saturationWarning(false), _saturationWarning(false),
_instanceId(instanceId), _instanceId(instanceId),
@ -1349,55 +1343,25 @@ void TransmitMixer::ProcessAudio(int delay_ms, int clock_drift,
} }
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION #ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
int TransmitMixer::TypingDetection(bool keyPressed) void TransmitMixer::TypingDetection(bool keyPressed)
{ {
// We let the VAD determine if we're using this feature or not.
if (_audioFrame.vad_activity_ == AudioFrame::kVadUnknown) {
return;
}
// We let the VAD determine if we're using this feature or not. bool vadActive = _audioFrame.vad_activity_ == AudioFrame::kVadActive;
if (_audioFrame.vad_activity_ == AudioFrame::kVadUnknown) if (_typingDetection.Process(keyPressed, vadActive)) {
{ _typingNoiseWarningPending = true;
return (0); _typingNoiseDetected = true;
} } else {
if (_audioFrame.vad_activity_ == AudioFrame::kVadActive)
_timeActive++;
else
_timeActive = 0;
// Keep track if time since last typing event
if (keyPressed)
{
_timeSinceLastTyping = 0;
}
else
{
++_timeSinceLastTyping;
}
if ((_timeSinceLastTyping < _typeEventDelay)
&& (_audioFrame.vad_activity_ == AudioFrame::kVadActive)
&& (_timeActive < _timeWindow))
{
_penaltyCounter += _costPerTyping;
if (_penaltyCounter > _reportingThreshold)
{
// Triggers a callback in OnPeriodicProcess().
_typingNoiseWarningPending = true;
_typingNoiseDetected = true;
}
}
// If there is already a warning pending, do not change the state. // If there is already a warning pending, do not change the state.
// Otherwise sets a warning pending if noise is off now but previously on. // Otherwise set a warning pending if last callback was for noise detected.
if (!_typingNoiseWarningPending && _typingNoiseDetected) { if (!_typingNoiseWarningPending && _typingNoiseDetected) {
// Triggers a callback in OnPeriodicProcess().
_typingNoiseWarningPending = true; _typingNoiseWarningPending = true;
_typingNoiseDetected = false; _typingNoiseDetected = false;
} }
}
if (_penaltyCounter > 0)
_penaltyCounter-=_penaltyDecay;
return (0);
} }
#endif #endif
@ -1410,12 +1374,10 @@ int TransmitMixer::GetMixingFrequency()
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION #ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
int TransmitMixer::TimeSinceLastTyping(int &seconds) int TransmitMixer::TimeSinceLastTyping(int &seconds)
{ {
// We check in VoEAudioProcessingImpl that this is only called when // We check in VoEAudioProcessingImpl that this is only called when
// typing detection is active. // typing detection is active.
seconds = _typingDetection.TimeSinceLastDetectionInSeconds();
// Round to whole seconds return 0;
seconds = (_timeSinceLastTyping + 50) / 100;
return(0);
} }
#endif #endif
@ -1426,19 +1388,13 @@ int TransmitMixer::SetTypingDetectionParameters(int timeWindow,
int penaltyDecay, int penaltyDecay,
int typeEventDelay) int typeEventDelay)
{ {
if(timeWindow != 0) _typingDetection.SetParameters(timeWindow,
_timeWindow = timeWindow; costPerTyping,
if(costPerTyping != 0) reportingThreshold,
_costPerTyping = costPerTyping; penaltyDecay,
if(reportingThreshold != 0) typeEventDelay,
_reportingThreshold = reportingThreshold; 0);
if(penaltyDecay != 0) return 0;
_penaltyDecay = penaltyDecay;
if(typeEventDelay != 0)
_typeEventDelay = typeEventDelay;
return(0);
} }
#endif #endif

View File

@ -13,6 +13,7 @@
#include "webrtc/common_audio/resampler/include/push_resampler.h" #include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/common_types.h" #include "webrtc/common_types.h"
#include "webrtc/modules/audio_processing/typing_detection.h"
#include "webrtc/modules/interface/module_common_types.h" #include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/file_player.h" #include "webrtc/modules/utility/interface/file_player.h"
#include "webrtc/modules/utility/interface/file_recorder.h" #include "webrtc/modules/utility/interface/file_recorder.h"
@ -186,7 +187,7 @@ private:
void ProcessAudio(int delay_ms, int clock_drift, int current_mic_level); void ProcessAudio(int delay_ms, int clock_drift, int current_mic_level);
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION #ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
int TypingDetection(bool keyPressed); void TypingDetection(bool keyPressed);
#endif #endif
// uses // uses
@ -215,19 +216,9 @@ private:
CriticalSectionWrapper& _callbackCritSect; CriticalSectionWrapper& _callbackCritSect;
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION #ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
int32_t _timeActive; webrtc::TypingDetection _typingDetection;
int32_t _timeSinceLastTyping;
int32_t _penaltyCounter;
bool _typingNoiseWarningPending; bool _typingNoiseWarningPending;
bool _typingNoiseDetected; bool _typingNoiseDetected;
// Tunable treshold values
int _timeWindow; // nr of10ms slots accepted to count as a hit.
int _costPerTyping; // Penalty added for a typing + activity coincide.
int _reportingThreshold; // Threshold for _penaltyCounter.
int _penaltyDecay; // How much we reduce _penaltyCounter every 10 ms.
int _typeEventDelay; // How old typing events we allow
#endif #endif
bool _saturationWarning; bool _saturationWarning;