Move out typing detection to its own class.
This will allow an embedder to use it directly. Adding inertia/hangover time between updates of the reported detection status to the algorithm, controlled by a parameter. That is usually desired and this way a consumer of the class don't have to implement that. (VoiceEngine will let it be 1, which results in the same behavior as before, and keep controlling the hangover itself.) R=andrew@webrtc.org, niklas.enbom@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/6219004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5462 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@ -69,6 +69,8 @@
|
||||
'noise_suppression_impl.h',
|
||||
'processing_component.cc',
|
||||
'processing_component.h',
|
||||
'typing_detection.cc',
|
||||
'typing_detection.h',
|
||||
'utility/delay_estimator.c',
|
||||
'utility/delay_estimator.h',
|
||||
'utility/delay_estimator_internal.h',
|
||||
|
90
webrtc/modules/audio_processing/typing_detection.cc
Normal file
90
webrtc/modules/audio_processing/typing_detection.cc
Normal file
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/typing_detection.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TypingDetection::TypingDetection()
|
||||
: time_active_(0),
|
||||
time_since_last_typing_(0),
|
||||
penalty_counter_(0),
|
||||
counter_since_last_detection_update_(0),
|
||||
detection_to_report_(false),
|
||||
new_detection_to_report_(false),
|
||||
time_window_(10),
|
||||
cost_per_typing_(100),
|
||||
reporting_threshold_(300),
|
||||
penalty_decay_(1),
|
||||
type_event_delay_(2),
|
||||
report_detection_update_period_(1) {
|
||||
}
|
||||
|
||||
TypingDetection::~TypingDetection() {}
|
||||
|
||||
bool TypingDetection::Process(bool key_pressed, bool vad_activity) {
|
||||
if (vad_activity)
|
||||
time_active_++;
|
||||
else
|
||||
time_active_ = 0;
|
||||
|
||||
// Keep track if time since last typing event
|
||||
if (key_pressed)
|
||||
time_since_last_typing_ = 0;
|
||||
else
|
||||
++time_since_last_typing_;
|
||||
|
||||
if (time_since_last_typing_ < type_event_delay_ &&
|
||||
vad_activity &&
|
||||
time_active_ < time_window_) {
|
||||
penalty_counter_ += cost_per_typing_;
|
||||
if (penalty_counter_ > reporting_threshold_)
|
||||
new_detection_to_report_ = true;
|
||||
}
|
||||
|
||||
if (penalty_counter_ > 0)
|
||||
penalty_counter_ -= penalty_decay_;
|
||||
|
||||
if (++counter_since_last_detection_update_ ==
|
||||
report_detection_update_period_) {
|
||||
detection_to_report_ = new_detection_to_report_;
|
||||
new_detection_to_report_ = false;
|
||||
counter_since_last_detection_update_ = 0;
|
||||
}
|
||||
|
||||
return detection_to_report_;
|
||||
}
|
||||
|
||||
int TypingDetection::TimeSinceLastDetectionInSeconds() {
|
||||
// Round to whole seconds.
|
||||
return (time_since_last_typing_ + 50) / 100;
|
||||
}
|
||||
|
||||
void TypingDetection::SetParameters(int time_window,
|
||||
int cost_per_typing,
|
||||
int reporting_threshold,
|
||||
int penalty_decay,
|
||||
int type_event_delay,
|
||||
int report_detection_update_period) {
|
||||
if (time_window) time_window_ = time_window;
|
||||
|
||||
if (cost_per_typing) cost_per_typing_ = cost_per_typing;
|
||||
|
||||
if (reporting_threshold) reporting_threshold_ = reporting_threshold;
|
||||
|
||||
if (penalty_decay) penalty_decay_ = penalty_decay;
|
||||
|
||||
if (type_event_delay) type_event_delay_ = type_event_delay;
|
||||
|
||||
if (report_detection_update_period)
|
||||
report_detection_update_period_ = report_detection_update_period;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
93
webrtc/modules/audio_processing/typing_detection.h
Normal file
93
webrtc/modules/audio_processing/typing_detection.h
Normal file
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_
|
||||
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class TypingDetection {
|
||||
public:
|
||||
TypingDetection();
|
||||
virtual ~TypingDetection();
|
||||
|
||||
// Run the detection algortihm. Shall be called every 10 ms. Returns true if
|
||||
// typing is detected, or false if not, based on the update period as set with
|
||||
// SetParameters(). See |report_detection_update_period_| description below.
|
||||
bool Process(bool key_pressed, bool vad_activity);
|
||||
|
||||
// Gets the time in seconds since the last detection.
|
||||
int TimeSinceLastDetectionInSeconds();
|
||||
|
||||
// Sets the algorithm parameters. A parameter value of 0 leaves it unchanged.
|
||||
// See the correspondning member variables below for descriptions.
|
||||
void SetParameters(int time_window,
|
||||
int cost_per_typing,
|
||||
int reporting_threshold,
|
||||
int penalty_decay,
|
||||
int type_event_delay,
|
||||
int report_detection_update_period);
|
||||
|
||||
private:
|
||||
int time_active_;
|
||||
int time_since_last_typing_;
|
||||
int penalty_counter_;
|
||||
|
||||
// Counter since last time the detection status reported by Process() was
|
||||
// updated. See also |report_detection_update_period_|.
|
||||
int counter_since_last_detection_update_;
|
||||
|
||||
// The detection status to report. Updated every
|
||||
// |report_detection_update_period_| call to Process().
|
||||
bool detection_to_report_;
|
||||
|
||||
// What |detection_to_report_| should be set to next time it is updated.
|
||||
bool new_detection_to_report_;
|
||||
|
||||
// Settable threshold values.
|
||||
|
||||
// Number of 10 ms slots accepted to count as a hit.
|
||||
int time_window_;
|
||||
|
||||
// Penalty added for a typing + activity coincide.
|
||||
int cost_per_typing_;
|
||||
|
||||
// Threshold for |penalty_counter_|.
|
||||
int reporting_threshold_;
|
||||
|
||||
// How much we reduce |penalty_counter_| every 10 ms.
|
||||
int penalty_decay_;
|
||||
|
||||
// How old typing events we allow.
|
||||
int type_event_delay_;
|
||||
|
||||
// Settable update period.
|
||||
|
||||
// Number of 10 ms slots between each update of the detection status returned
|
||||
// by Process(). This inertia added to the algorithm is usually desirable and
|
||||
// provided so that consumers of the class don't have to implement that
|
||||
// themselves if they don't wish.
|
||||
// If set to 1, each call to Process() will return the detection status for
|
||||
// that 10 ms slot.
|
||||
// If set to N (where N > 1), the detection status returned from Process()
|
||||
// will remain the same until Process() has been called N times. Then, if none
|
||||
// of the last N calls to Process() has detected typing for each respective
|
||||
// 10 ms slot, Process() will return false. If at least one of the last N
|
||||
// calls has detected typing, Process() will return true. And that returned
|
||||
// status will then remain the same until the next N calls have been done.
|
||||
int report_detection_update_period_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_
|
@ -83,6 +83,8 @@ void MyObserver::CallbackOnError(int channel, int err_code) {
|
||||
// Add printf for other error codes here
|
||||
if (err_code == VE_TYPING_NOISE_WARNING) {
|
||||
printf(" TYPING NOISE DETECTED \n");
|
||||
} else if (err_code == VE_TYPING_NOISE_OFF_WARNING) {
|
||||
printf(" TYPING NOISE OFF DETECTED \n");
|
||||
} else if (err_code == VE_RECEIVE_PACKET_TIMEOUT) {
|
||||
printf(" RECEIVE PACKET TIMEOUT \n");
|
||||
} else if (err_code == VE_PACKET_RECEIPT_RESTARTED) {
|
||||
@ -456,7 +458,7 @@ void RunTest(std::string out_path) {
|
||||
printf("%i. Toggle microphone mute \n", option_index++);
|
||||
printf("%i. Toggle on hold status \n", option_index++);
|
||||
printf("%i. Get last error code \n", option_index++);
|
||||
printf("%i. Toggle typing detection (for Mac/Windows only) \n",
|
||||
printf("%i. Toggle typing detection \n",
|
||||
option_index++);
|
||||
printf("%i. Record a PCM file \n", option_index++);
|
||||
printf("%i. Play a previously recorded PCM file locally \n",
|
||||
|
@ -50,14 +50,16 @@ TransmitMixer::OnPeriodicProcess()
|
||||
WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, -1),
|
||||
"TransmitMixer::OnPeriodicProcess() => "
|
||||
"CallbackOnError(VE_TYPING_NOISE_WARNING)");
|
||||
_voiceEngineObserverPtr->CallbackOnError(-1,
|
||||
_voiceEngineObserverPtr->CallbackOnError(
|
||||
-1,
|
||||
VE_TYPING_NOISE_WARNING);
|
||||
} else {
|
||||
WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, -1),
|
||||
"TransmitMixer::OnPeriodicProcess() => "
|
||||
"CallbackOnError(VE_TYPING_NOISE_OFF_WARNING)");
|
||||
_voiceEngineObserverPtr->CallbackOnError(
|
||||
-1, VE_TYPING_NOISE_OFF_WARNING);
|
||||
-1,
|
||||
VE_TYPING_NOISE_OFF_WARNING);
|
||||
}
|
||||
}
|
||||
_typingNoiseWarningPending = false;
|
||||
@ -194,16 +196,8 @@ TransmitMixer::TransmitMixer(uint32_t instanceId) :
|
||||
_critSect(*CriticalSectionWrapper::CreateCriticalSection()),
|
||||
_callbackCritSect(*CriticalSectionWrapper::CreateCriticalSection()),
|
||||
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
|
||||
_timeActive(0),
|
||||
_timeSinceLastTyping(0),
|
||||
_penaltyCounter(0),
|
||||
_typingNoiseWarningPending(false),
|
||||
_typingNoiseDetected(false),
|
||||
_timeWindow(10), // 10ms slots accepted to count as a hit
|
||||
_costPerTyping(100), // Penalty added for a typing + activity coincide
|
||||
_reportingThreshold(300), // Threshold for _penaltyCounter
|
||||
_penaltyDecay(1), // how much we reduce _penaltyCounter every 10 ms.
|
||||
_typeEventDelay(2), // how "old" event we check for
|
||||
#endif
|
||||
_saturationWarning(false),
|
||||
_instanceId(instanceId),
|
||||
@ -1349,55 +1343,25 @@ void TransmitMixer::ProcessAudio(int delay_ms, int clock_drift,
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
|
||||
int TransmitMixer::TypingDetection(bool keyPressed)
|
||||
void TransmitMixer::TypingDetection(bool keyPressed)
|
||||
{
|
||||
|
||||
// We let the VAD determine if we're using this feature or not.
|
||||
if (_audioFrame.vad_activity_ == AudioFrame::kVadUnknown)
|
||||
{
|
||||
return (0);
|
||||
if (_audioFrame.vad_activity_ == AudioFrame::kVadUnknown) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (_audioFrame.vad_activity_ == AudioFrame::kVadActive)
|
||||
_timeActive++;
|
||||
else
|
||||
_timeActive = 0;
|
||||
|
||||
// Keep track if time since last typing event
|
||||
if (keyPressed)
|
||||
{
|
||||
_timeSinceLastTyping = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
++_timeSinceLastTyping;
|
||||
}
|
||||
|
||||
if ((_timeSinceLastTyping < _typeEventDelay)
|
||||
&& (_audioFrame.vad_activity_ == AudioFrame::kVadActive)
|
||||
&& (_timeActive < _timeWindow))
|
||||
{
|
||||
_penaltyCounter += _costPerTyping;
|
||||
if (_penaltyCounter > _reportingThreshold)
|
||||
{
|
||||
// Triggers a callback in OnPeriodicProcess().
|
||||
bool vadActive = _audioFrame.vad_activity_ == AudioFrame::kVadActive;
|
||||
if (_typingDetection.Process(keyPressed, vadActive)) {
|
||||
_typingNoiseWarningPending = true;
|
||||
_typingNoiseDetected = true;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// If there is already a warning pending, do not change the state.
|
||||
// Otherwise sets a warning pending if noise is off now but previously on.
|
||||
// Otherwise set a warning pending if last callback was for noise detected.
|
||||
if (!_typingNoiseWarningPending && _typingNoiseDetected) {
|
||||
// Triggers a callback in OnPeriodicProcess().
|
||||
_typingNoiseWarningPending = true;
|
||||
_typingNoiseDetected = false;
|
||||
}
|
||||
|
||||
if (_penaltyCounter > 0)
|
||||
_penaltyCounter-=_penaltyDecay;
|
||||
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1412,10 +1376,8 @@ int TransmitMixer::TimeSinceLastTyping(int &seconds)
|
||||
{
|
||||
// We check in VoEAudioProcessingImpl that this is only called when
|
||||
// typing detection is active.
|
||||
|
||||
// Round to whole seconds
|
||||
seconds = (_timeSinceLastTyping + 50) / 100;
|
||||
return(0);
|
||||
seconds = _typingDetection.TimeSinceLastDetectionInSeconds();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1426,19 +1388,13 @@ int TransmitMixer::SetTypingDetectionParameters(int timeWindow,
|
||||
int penaltyDecay,
|
||||
int typeEventDelay)
|
||||
{
|
||||
if(timeWindow != 0)
|
||||
_timeWindow = timeWindow;
|
||||
if(costPerTyping != 0)
|
||||
_costPerTyping = costPerTyping;
|
||||
if(reportingThreshold != 0)
|
||||
_reportingThreshold = reportingThreshold;
|
||||
if(penaltyDecay != 0)
|
||||
_penaltyDecay = penaltyDecay;
|
||||
if(typeEventDelay != 0)
|
||||
_typeEventDelay = typeEventDelay;
|
||||
|
||||
|
||||
return(0);
|
||||
_typingDetection.SetParameters(timeWindow,
|
||||
costPerTyping,
|
||||
reportingThreshold,
|
||||
penaltyDecay,
|
||||
typeEventDelay,
|
||||
0);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "webrtc/common_audio/resampler/include/push_resampler.h"
|
||||
#include "webrtc/common_types.h"
|
||||
#include "webrtc/modules/audio_processing/typing_detection.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/modules/utility/interface/file_player.h"
|
||||
#include "webrtc/modules/utility/interface/file_recorder.h"
|
||||
@ -186,7 +187,7 @@ private:
|
||||
void ProcessAudio(int delay_ms, int clock_drift, int current_mic_level);
|
||||
|
||||
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
|
||||
int TypingDetection(bool keyPressed);
|
||||
void TypingDetection(bool keyPressed);
|
||||
#endif
|
||||
|
||||
// uses
|
||||
@ -215,19 +216,9 @@ private:
|
||||
CriticalSectionWrapper& _callbackCritSect;
|
||||
|
||||
#ifdef WEBRTC_VOICE_ENGINE_TYPING_DETECTION
|
||||
int32_t _timeActive;
|
||||
int32_t _timeSinceLastTyping;
|
||||
int32_t _penaltyCounter;
|
||||
webrtc::TypingDetection _typingDetection;
|
||||
bool _typingNoiseWarningPending;
|
||||
bool _typingNoiseDetected;
|
||||
|
||||
// Tunable treshold values
|
||||
int _timeWindow; // nr of10ms slots accepted to count as a hit.
|
||||
int _costPerTyping; // Penalty added for a typing + activity coincide.
|
||||
int _reportingThreshold; // Threshold for _penaltyCounter.
|
||||
int _penaltyDecay; // How much we reduce _penaltyCounter every 10 ms.
|
||||
int _typeEventDelay; // How old typing events we allow
|
||||
|
||||
#endif
|
||||
bool _saturationWarning;
|
||||
|
||||
|
Reference in New Issue
Block a user