Interupt Prediction
Description
Interrupt prediction augments the VAD's begin of speech detection. Once the VAD triggers begin of speech, Interrupt Prediction will look at the nature of that speech and determine if we should ignore it or interrupt the bot. This avoids false interrupts where users may use conversation fillers like "um hmm", "uh huh", "yeah", etc. which are typically used in conversations to show engagement. Previously these words would cause the bot to stop speaking and interrupt the conversation flow.
Sample Code
model_info = krisp_audio.ModelInfo()
model_info.path = self.model_path
ip_cfg = krisp_audio.IpSessionConfig()
ip_cfg.inputSampleRate = self._int_to_sample_rate(self.audio_stream_info['sample_rate'])
ip_cfg.inputFrameDuration = self._int_to_frame_dur(self.frame_dur)
ip_cfg.modelInfo = model_info
if self.audio_stream_info['sample_type'] == 'FLOAT':
ip_instance = krisp_audio.IpFloat.create(ip_cfg)
elif self.audio_stream_info['sample_type'] == 'PCM_16':
ip_instance = krisp_audio.IpInt16.create(ip_cfg)
else:
raise ValueError(f"Unsupported sample type {self.audio_stream_info['sample_type']}")
ipRecommendedThreshold = 0.5
# Processing fixed sized audio frames
for frame in audioFrames:
# Logic to call VAD and determine if VAD probability is True or False
vadProb = vad_API(frame)
vad_flag = vad_prob >= 0.5
ipProbability = ip_instance.process(frame, vad_flag) # the value is in the [0, 1] range
if ipProbability >= ipRecommendedThreshold:
# Consider user intended to interupt the bot
# Free the Krisp SDK global instance
ip_instance = None
krisp_audio.globalDestroy()
Updated 16 days ago
