Twilio Video: IOS

Introduction

The page will guide you in configuring your XCode project to.

  • Include the Accelerate SDK framework
  • Include the KrispAudio SDK xcframework
  • Include Krisp model files in the XCode project
  • Load Krisp models at runtime within the iOS app
  • Include KrispAudioDevice - an implementation of the TVIAudioDevice protocol - into the XCode project

Requirements

  • Working knowledge of XCode
  • Familiarity with the Twilio Video SDK for IOS
  • Understanding of the TVIAudioDevice protocol in the Twilio Video SDK for IOS
  • Krisp Audio SDK for IOS bundled as an xcframework

Integration Steps

1. Including Accelerate and KrispAudio SDK frameworks

In your XCode IOS project settings

  • Go to the TARGETS
  • Choose your target
  • Select the General tab
  • Find the Frameworks, Libraries, and Embedded Content
  • Press the + icon and add the Accelerate.framework
  • Press the + icon and add the KrispAudioSDK.xcframework

We will assume that the TwilioVideo SDK is already included in the project.

2. Including Krisp model file into the IOS application

  • In your XCode project, create a folder named Models and store Krisp model files in the folder
  • Go to the TARGETS
  • Choose your target
  • Select the Build Phases tab
  • Select the Copy Bundle Resources settings
  • Press the + icon and add the model file to the list

The model file will now be available in the root folder of the iOS app at runtime.

3. Integrating Krisp with TVIAudioDevice

The KrispAudioDevice implements the TVIAudioDevice protocol enabling:

  • Audio capture from the device’s microphone
  • Modification of the audio frames using Krisp Audio SDK noise-canceling tech
  • Transmission of the modified audio data to the Twilio Video SDK

KrispAudioDevice header

#import <TwilioVideo/TwilioVideo.h>


@interface KrispAudioDevice : NSObject <TVIAudioDevice>

- (instancetype)init: (int)sampleRate;
- (BOOL)loadKrisp:(NSString *)modelFileName;
- (void)enableNoiseCanceling;
- (void)disableNoiseCanceling;
- (void)unloadKrisp;

@end

KrispAudioDevice module

#import "KrispAudioDevice.h"

#include "KrispAudioSDK/krisp-audio-sdk.hpp"
#include "KrispAudioSDK/krisp-audio-sdk-nc.hpp"


static const int kChannels = 1;
static const int kFrameDurationMs = 10;

@interface  KrispAudioDevice()

@property (nonatomic) int sampleRate;
@property (nonatomic) int samplesPerFrame;
@property (nonatomic, assign) TVIAudioDeviceContext audioContext;
@property (nonatomic, assign) AudioUnit remoteIOUnit;
@property (nonatomic, assign) BOOL isCapturing;
@property (nonatomic, strong) NSMutableData *captureBuffer;
@property BOOL krispLoaded;
@property (nonatomic) KrispAudioSessionID krispSession;
@property (atomic) BOOL noiseCancelingEnabled;
@property (nonatomic) SInt16 * outputFrame;

@end


@implementation KrispAudioDevice

- (instancetype)init: (int)sampleRate {
    self = [super init];
    if (self) {
        KrispAudioSamplingRate krispSamplingRate = getKrispSamplingRate(sampleRate);
        if (krispSamplingRate == 0) {
            NSLog(@"[KrispAudioDevice] Krisp Audio SDK does not support %d sampling rate", sampleRate);
            return nil;
        }
        _sampleRate = sampleRate;
        _samplesPerFrame = (int)(sampleRate * kFrameDurationMs / 1000);
        _outputFrame = (SInt16 *)malloc(_samplesPerFrame * sizeof(SInt16));
        if (!_outputFrame) {
            NSLog(@"Failed to allocate memory for outputFrame");
            return nil;
        }
        _captureBuffer = [NSMutableData data];
        _noiseCancelingEnabled = YES;
    }
    return self;
}

- (void)dealloc {
    if (_isCapturing) {
        [self stopCapturing];
    }
    if (_krispLoaded) {
        [self unloadKrisp];
    }
    if (_outputFrame) {
        free(_outputFrame);
        _outputFrame = nil;
    }
    AVAudioSession *session = [AVAudioSession sharedInstance];
    NSError *error = nil;
    BOOL success = [session setActive:NO error:&error];
    if (!success || error) {
        NSLog(@"[KrispAudioDevice] Error deactivating AVAudioSession: %@", error);
    }
}

#pragma mark - public control methods

- (void)enableNoiseCanceling {
    _noiseCancelingEnabled = YES;
}

- (void)disableNoiseCanceling {
    _noiseCancelingEnabled = NO;
}

- (BOOL)loadKrisp:(NSString *)modelFileName {
    if (self.krispLoaded) {
        NSLog(@"[KrispAudioDevice] A Krisp model is already loaded. Unload it before loading a new one.");
        return NO;
    }
    int globalInitResult = krispAudioGlobalInit(0);
    if (globalInitResult) {
        NSLog(@"[KrispAudioDevice] Failed to initialize Krisp Audio SDK with code: %d", globalInitResult);
        return NO;
    }
    NSString *modelFilePath = [[NSBundle mainBundle]
        pathForResource:modelFileName
        ofType:nil
    ];
    if (!modelFilePath) {
        NSLog(@"[KrispAudioDevice] Model file not found in bundle: %@", modelFileName);
        return NO;
    }
    NSData *modelData = [NSData dataWithContentsOfFile:modelFilePath];
    if (!modelData) {
        NSLog(@"[KrispAudioDevice] Failed to read model file: %@", modelFilePath);
        return NO;
    }
    const void *modelDataPtr = [modelData bytes];
    unsigned long modelDataSize = [modelData length];
    int setModelResult = krispAudioSetModelBlob(modelDataPtr, (unsigned int)modelDataSize, "model32");
    if (setModelResult) {
        NSLog(@"[KrispAudioDevice] Failed to load Krisp model content with code: %d", setModelResult);
        return NO;
    }
    self.krispLoaded = YES;
    return YES;
}

- (void) unloadKrisp {
    if (_krispSession) {
        int closeSessionResult = krispAudioNcCloseSession(_krispSession);
        if (closeSessionResult != 0) {
            NSLog(@"[KrispAudioDevice] Failed to close Krisp session with code: %d", closeSessionResult);
        }
        _krispSession = nil;
    }
    int removeModelResult = krispAudioRemoveModel("model32");
    if (removeModelResult != 0) {
        NSLog(@"[KrispAudioDevice] Failed to remove Krisp model with code: %d", removeModelResult);
    }
    int globalDestroyResult = krispAudioGlobalDestroy();
    if (globalDestroyResult != 0) {
        NSLog(@"[KrispAudioDevice] Failed to destroy Krisp global with code: %d", globalDestroyResult);
    }
    _krispLoaded = NO;
}

static KrispAudioSamplingRate getKrispSamplingRate(int sampleRate) {
    switch (sampleRate) {
        case 8000:
            return KRISP_AUDIO_SAMPLING_RATE_8000HZ;
        case 16000:
            return KRISP_AUDIO_SAMPLING_RATE_16000HZ;
        case 24000:
            return KRISP_AUDIO_SAMPLING_RATE_24000HZ;
        case 32000:
            return KRISP_AUDIO_SAMPLING_RATE_32000HZ;
        case 44100:
            return KRISP_AUDIO_SAMPLING_RATE_44100HZ;
        case 48000:
            return KRISP_AUDIO_SAMPLING_RATE_48000HZ;
        case 88200:
            return KRISP_AUDIO_SAMPLING_RATE_88200HZ;
        case 96000:
            return KRISP_AUDIO_SAMPLING_RATE_96000HZ;
        default:
            return static_cast<KrispAudioSamplingRate>(0);
    }
}

#pragma mark - TVIAudioDevice protocol methods

- (BOOL)initializeRenderer {
    // Renderer not implemented as this device only handles audio capture
    return NO;
}

- (nullable TVIAudioFormat *)renderFormat {
    return [
        [TVIAudioFormat alloc]
            initWithChannels:(size_t)kChannels
            sampleRate:(uint32_t)self.sampleRate
            framesPerBuffer:(size_t)self.samplesPerFrame
    ];
}

- (BOOL)startRendering:(nonnull TVIAudioDeviceContext)context {
    // Renderer not implemented
    return NO;
}

- (BOOL)stopRendering {
    // Renderer not implemented
    return NO;
}

- (nullable TVIAudioFormat *)captureFormat { 
    return [
        [TVIAudioFormat alloc]
            initWithChannels:(size_t)kChannels
            sampleRate:(uint32_t)self.sampleRate
            framesPerBuffer:(size_t)self.samplesPerFrame        
    ];
}

- (BOOL)initializeCapturer {
    AVAudioSession *session = [AVAudioSession sharedInstance];
    NSError *error = NULL;
    
    BOOL success = [session setCategory:AVAudioSessionCategoryPlayAndRecord
                            withOptions:AVAudioSessionCategoryOptionAllowBluetooth
                            error:&error];
    
    if (!success || error) {
        NSLog(@"[KrispAudioDevice] Error activating AVAudioSession: %@", error);
        return NO;
    }
    
    success = [session setActive:YES error:&error];
    if (!success || error) {
        NSLog(@"[KrispAudioDevice] Error activating AVAudioSession: %@", error);
        return NO;
    }
    
    AudioComponentDescription ioComponentDescription;
    ioComponentDescription.componentType = kAudioUnitType_Output;
    ioComponentDescription.componentSubType = kAudioUnitSubType_RemoteIO;
    ioComponentDescription.componentManufacturer = kAudioUnitManufacturer_Apple;
    ioComponentDescription.componentFlags = 0;
    ioComponentDescription.componentFlagsMask = 0;
    
    AudioComponent component = AudioComponentFindNext(NULL, &ioComponentDescription);
    if (component == NULL) {
        NSLog(@"[KrispAudioDevice] Could not find RemoteIO Audio Component");
        return NO;
    }
    
    OSStatus status = AudioComponentInstanceNew(component, &_remoteIOUnit);
    if (status != noErr) {
        NSLog(@"[KrispAudioDevice] Could not create RemoteIO Audio Unit");
        return NO;
    }

    UInt32 enableInput = 1;
    status = AudioUnitSetProperty(self.remoteIOUnit,
                                  kAudioOutputUnitProperty_EnableIO,
                                  kAudioUnitScope_Input,
                                  1,
                                  &enableInput,
                                  sizeof(enableInput));
    if (status != noErr) {
        NSLog(@"[KrispAudioDevice] Could not enable input on RemoteIO Unit");
        return NO;
    }
    
    AudioStreamBasicDescription audioFormat;
    memset(&audioFormat, 0, sizeof(audioFormat));
    audioFormat.mSampleRate       = self.sampleRate;
    audioFormat.mFormatID         = kAudioFormatLinearPCM;
    audioFormat.mFormatFlags      = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
    audioFormat.mChannelsPerFrame = kChannels;
    audioFormat.mBitsPerChannel   = 16;
    audioFormat.mBytesPerFrame    = audioFormat.mChannelsPerFrame * sizeof(SInt16);
    audioFormat.mFramesPerPacket  = 1;
    audioFormat.mBytesPerPacket   = audioFormat.mBytesPerFrame * audioFormat.mFramesPerPacket;

    status = AudioUnitSetProperty(self.remoteIOUnit,
                                  kAudioUnitProperty_StreamFormat,
                                  kAudioUnitScope_Output,  // Note: for capturing, set at scope 'Output'
                                  1,                        // bus 1 for input
                                  &audioFormat,
                                  sizeof(audioFormat));
    
    if (status != noErr) {
        NSLog(@"[KrispAudioDevice] Could not set stream format on RemoteIO");
        return NO;
    }
    
    AURenderCallbackStruct captureCallback;
    captureCallback.inputProc = CaptureCallback;
    captureCallback.inputProcRefCon = (__bridge void *)(self);

    status = AudioUnitSetProperty(self.remoteIOUnit,
                                  kAudioOutputUnitProperty_SetInputCallback,
                                  kAudioUnitScope_Global,
                                  1,
                                  &captureCallback,
                                  sizeof(captureCallback));
    if (status != noErr) {
        NSLog(@"[KrispAudioDevice] Could not set capture callback");
        return NO;
    }

    status = AudioUnitInitialize(self.remoteIOUnit);
    if (status != noErr) {
        NSLog(@"[KrispAudioDevice] Could not initialize RemoteIO Audio Unit");
        return NO;
    }

    return YES;
}

- (BOOL)startCapturing:(nonnull TVIAudioDeviceContext)context {
    self.audioContext = context;
    self.isCapturing = YES;
    if (_krispSession) {
        krispAudioNcCloseSession(_krispSession);
        _krispSession = nil;
    }
    _krispSession = krispAudioNcCreateSession(
                      static_cast<KrispAudioSamplingRate>(_sampleRate),
                      static_cast<KrispAudioSamplingRate>(_sampleRate),
                      KRISP_AUDIO_FRAME_DURATION_10MS,
                      "model32");
    if (!_krispSession) {
        NSLog(@"[KrispAudioDevice] failed calling krispAudioNcCreateSession:");
        return NO;
    }
    if (self.remoteIOUnit) {
        OSStatus status = AudioOutputUnitStart(self.remoteIOUnit);
        if (status != noErr) {
            NSLog(@"[KrispAudioDevice] Could not start capturing, status: %d", (int)status);
            return NO;
        }
    }
    else {
        NSLog(@"[KrispAudioDevice] RemoteIO Audio Unit is not initialized!");
        return NO;
    }
    return YES;
}

- (BOOL)stopCapturing {
    if (self.isCapturing && self.remoteIOUnit) {
        AudioOutputUnitStop(self.remoteIOUnit);
        AudioUnitUninitialize(self.remoteIOUnit);
        AudioComponentInstanceDispose(self.remoteIOUnit);
        self.remoteIOUnit = nil;
    }
    self.isCapturing = NO;
    if (_krispSession) {
        krispAudioNcCloseSession(_krispSession);
        _krispSession = nil;
    }
    return YES;
}

#pragma mark - process 10ms sized frame

- (void)processFrameWithKrisp:(const SInt16 *)inputFrame {
    if (!self.noiseCancelingEnabled) {
        memcpy(_outputFrame, inputFrame, _samplesPerFrame * sizeof(SInt16));
        return;
    }
    int frameNcResult = krispAudioNcCleanAmbientNoiseInt16(
                            _krispSession,
                            inputFrame, _samplesPerFrame,
                            _outputFrame, _samplesPerFrame);
    if (frameNcResult != 0) {
        NSLog(@"[KrispAudioDevice] krispAudioNcCleanAmbientNoiseInt16 failed with code: %d", frameNcResult);
        memcpy(_outputFrame, inputFrame, _samplesPerFrame * sizeof(SInt16));
    }
}

#pragma mark - Audio Capture Callback

static OSStatus CaptureCallback(void *refCon,
                                AudioUnitRenderActionFlags *actionFlags,
                                const AudioTimeStamp *timestamp,
                                UInt32 busNumber,
                                UInt32 numFrames,
                                AudioBufferList *bufferList) {
    KrispAudioDevice *device = (__bridge KrispAudioDevice *)refCon;
    if (!device.isCapturing || !device.remoteIOUnit) {
        return noErr;
    }
     
    AudioBufferList localBufferList;
    localBufferList.mNumberBuffers = 1;
    localBufferList.mBuffers[0].mNumberChannels = kChannels;
    localBufferList.mBuffers[0].mDataByteSize = numFrames * sizeof(SInt16);
    localBufferList.mBuffers[0].mData = NULL;

    OSStatus status = AudioUnitRender(device.remoteIOUnit, actionFlags, timestamp,
                                      busNumber, numFrames, &localBufferList);
    if (status != noErr) {
        NSLog(@"[KrispAudioDevice] AudioUnitRender failed with status: %d", (int)status);
        return status;
    }
    
    SInt16 *incomingSamples = (SInt16 *)localBufferList.mBuffers[0].mData;
    [device.captureBuffer appendBytes:incomingSamples length:numFrames * sizeof(SInt16)];

    while ([device.captureBuffer length] >= (device.samplesPerFrame * sizeof(SInt16))) {
        NSRange range = NSMakeRange(0, device.samplesPerFrame * sizeof(SInt16));
        const SInt16 *inputSamples = (const SInt16 *)[device.captureBuffer bytes];
        [device processFrameWithKrisp:inputSamples];
        [device.captureBuffer replaceBytesInRange:range withBytes:NULL length:0];
        TVIAudioDeviceWriteCaptureData(device.audioContext,
                                       (int8_t *)device.outputFrame,
                                       device.samplesPerFrame * sizeof(SInt16));
    }
    
    return noErr;
}

@end