Merge pull request #157 from anam-org/feat/non-blocking-mic

ao-anam · web-flow · commit 4558ec8ca67e · 2025-10-15T11:06:05.000+01:00
feat: request mic permissions aync
diff --git a/src/AnamClient.ts b/src/AnamClient.ts
@@ -19,6 +19,7 @@ import {
 import {
   AnamClientOptions,
   AnamEvent,
+  AudioPermissionState,
   ConnectionClosedCode,
   EventCallbacks,
   InputAudioState,
@@ -35,7 +36,10 @@ export default class AnamClient {
 
   private personaConfig: PersonaConfig | undefined;
   private clientOptions: AnamClientOptions | undefined;
-  private inputAudioState: InputAudioState = { isMuted: false };
+  private inputAudioState: InputAudioState = {
+    isMuted: false,
+    permissionState: AudioPermissionState.NOT_REQUESTED,
+  };
 
   private sessionId: string | null = null;
   private organizationId: string | null = null;
diff --git a/src/modules/StreamingClient.ts b/src/modules/StreamingClient.ts
@@ -7,6 +7,7 @@ import {
 import {
   AnamEvent,
   InputAudioState,
+  AudioPermissionState,
   InternalEvent,
   SignalMessage,
   SignalMessageAction,
@@ -40,7 +41,10 @@ export class StreamingClient {
   private videoElement: HTMLVideoElement | null = null;
   private videoStream: MediaStream | null = null;
   private audioStream: MediaStream | null = null;
-  private inputAudioState: InputAudioState = { isMuted: false };
+  private inputAudioState: InputAudioState = {
+    isMuted: false,
+    permissionState: AudioPermissionState.NOT_REQUESTED,
+  };
   private audioDeviceId: string | undefined;
   private disableInputAudio: boolean;
   private successMetricPoller: ReturnType<typeof setInterval> | null = null;
@@ -307,7 +311,7 @@ export class StreamingClient {
       // start the connection
       this.signallingClient.connect();
     } catch (error) {
-      console.log('StreamingClient - startConnection: error', error);
+      console.error('StreamingClient - startConnection: error', error);
       this.handleWebrtcFailure(error);
     }
   }
@@ -363,6 +367,18 @@ export class StreamingClient {
       this.peerConnection.addTransceiver('audio', { direction: 'recvonly' });
     } else {
       this.peerConnection.addTransceiver('audio', { direction: 'sendrecv' });
+
+      // Handle audio setup after transceivers are configured
+      if (this.inputAudioStream) {
+        // User provided an audio stream, set it up immediately
+        await this.setupAudioTrack();
+      } else {
+        // No user stream, start microphone permission request asynchronously
+        // Don't await - let it run in parallel with connection setup
+        this.requestMicrophonePermissionAsync().catch((error) => {
+          console.error('Async microphone permission request failed:', error);
+        });
+      }
     }
   }
 
@@ -392,7 +408,6 @@ export class StreamingClient {
         break;
       case SignalMessageAction.END_SESSION:
         const reason = signalMessage.payload as string;
-        console.log('StreamingClient - onSignalMessage: reason', reason);
         this.publicEventEmitter.emit(
           AnamEvent.CONNECTION_CLOSED,
           ConnectionClosedCode.SERVER_CLOSED_CONNECTION,
@@ -548,48 +563,20 @@ export class StreamingClient {
       );
       return;
     }
+
     /**
-     * Audio
+     * Audio - Validate user-provided stream only
      *
-     * If the user hasn't provided an audio stream, capture the audio stream from the user's microphone and send it to the peer connection
-     * If input audio is disabled we don't send any audio to the peer connection
+     * If the user provided an audio stream, validate it has audio tracks
+     * Microphone permission request will be handled asynchronously
      */
-    if (!this.disableInputAudio) {
-      if (this.inputAudioStream) {
-        // verify the user provided stream has audio tracks
-        if (!this.inputAudioStream.getAudioTracks().length) {
-          throw new Error(
-            'StreamingClient - setupDataChannels: user provided stream does not have audio tracks',
-          );
-        }
-      } else {
-        const audioConstraints: MediaTrackConstraints = {
-          echoCancellation: true,
-        };
-
-        // If an audio device ID is provided in the options, use it
-        if (this.audioDeviceId) {
-          audioConstraints.deviceId = {
-            exact: this.audioDeviceId,
-          };
-        }
-
-        this.inputAudioStream = await navigator.mediaDevices.getUserMedia({
-          audio: audioConstraints,
-        });
-      }
-
-      // mute the audio tracks if the user has muted the microphone
-      if (this.inputAudioState.isMuted) {
-        this.muteAllAudioTracks();
+    if (!this.disableInputAudio && this.inputAudioStream) {
+      // verify the user provided stream has audio tracks
+      if (!this.inputAudioStream.getAudioTracks().length) {
+        throw new Error(
+          'StreamingClient - setupDataChannels: user provided stream does not have audio tracks',
+        );
       }
-      const audioTrack = this.inputAudioStream.getAudioTracks()[0];
-      this.peerConnection.addTrack(audioTrack, this.inputAudioStream);
-      // pass the stream to the callback if it exists
-      this.publicEventEmitter.emit(
-        AnamEvent.INPUT_AUDIO_STREAM_STARTED,
-        this.inputAudioStream,
-      );
     }
 
     /**
@@ -615,6 +602,114 @@ export class StreamingClient {
     };
   }
 
+  /**
+   * Request microphone permission asynchronously without blocking connection
+   */
+  private async requestMicrophonePermissionAsync() {
+    if (this.inputAudioState.permissionState === AudioPermissionState.PENDING) {
+      return; // Already requesting
+    }
+
+    this.inputAudioState = {
+      ...this.inputAudioState,
+      permissionState: AudioPermissionState.PENDING,
+    };
+
+    this.publicEventEmitter.emit(AnamEvent.MIC_PERMISSION_PENDING);
+
+    try {
+      const audioConstraints: MediaTrackConstraints = {
+        echoCancellation: true,
+      };
+
+      // If an audio device ID is provided in the options, use it
+      if (this.audioDeviceId) {
+        audioConstraints.deviceId = {
+          exact: this.audioDeviceId,
+        };
+      }
+
+      this.inputAudioStream = await navigator.mediaDevices.getUserMedia({
+        audio: audioConstraints,
+      });
+
+      this.inputAudioState = {
+        ...this.inputAudioState,
+        permissionState: AudioPermissionState.GRANTED,
+      };
+
+      this.publicEventEmitter.emit(AnamEvent.MIC_PERMISSION_GRANTED);
+
+      // Now add the audio track to the existing connection
+      await this.setupAudioTrack();
+    } catch (error) {
+      console.error('Failed to get microphone permission:', error);
+      this.inputAudioState = {
+        ...this.inputAudioState,
+        permissionState: AudioPermissionState.DENIED,
+      };
+
+      const errorMessage =
+        error instanceof Error ? error.message : String(error);
+      this.publicEventEmitter.emit(
+        AnamEvent.MIC_PERMISSION_DENIED,
+        errorMessage,
+      );
+    }
+  }
+
+  /**
+   * Set up audio track and add it to the peer connection using replaceTrack
+   */
+  private async setupAudioTrack() {
+    if (!this.peerConnection || !this.inputAudioStream) {
+      return;
+    }
+
+    // verify the stream has audio tracks
+    if (!this.inputAudioStream.getAudioTracks().length) {
+      console.error(
+        'StreamingClient - setupAudioTrack: stream does not have audio tracks',
+      );
+      return;
+    }
+
+    // mute the audio tracks if the user has muted the microphone
+    if (this.inputAudioState.isMuted) {
+      this.muteAllAudioTracks();
+    }
+
+    const audioTrack = this.inputAudioStream.getAudioTracks()[0];
+
+    // Find the audio sender
+    const existingSenders = this.peerConnection.getSenders();
+    const audioSender = existingSenders.find(
+      (sender) =>
+        sender.track?.kind === 'audio' ||
+        (sender.track === null && sender.dtmf !== null), // audio sender without track
+    );
+
+    if (audioSender) {
+      // Replace existing track (or null track) with our audio track
+      try {
+        await audioSender.replaceTrack(audioTrack);
+      } catch (error) {
+        console.error('Failed to replace audio track:', error);
+        // Fallback: add track normally
+        this.peerConnection.addTrack(audioTrack, this.inputAudioStream);
+      }
+    } else {
+      // No audio sender found, add track normally
+      this.peerConnection.addTrack(audioTrack, this.inputAudioStream);
+    }
+
+    // pass the stream to the callback
+    this.publicEventEmitter.emit(
+      AnamEvent.INPUT_AUDIO_STREAM_STARTED,
+      this.inputAudioStream,
+    );
+  }
+
   private async initPeerConnectionAndSendOffer() {
     await this.initPeerConnection();
 
diff --git a/src/types/InputAudioState.ts b/src/types/InputAudioState.ts
@@ -1,3 +1,11 @@
+export enum AudioPermissionState {
+  PENDING = 'pending',
+  GRANTED = 'granted',
+  DENIED = 'denied',
+  NOT_REQUESTED = 'not_requested',
+}
+
 export interface InputAudioState {
   isMuted: boolean;
+  permissionState: AudioPermissionState;
 }
diff --git a/src/types/events/public/AnamEvent.ts b/src/types/events/public/AnamEvent.ts
@@ -10,4 +10,7 @@ export enum AnamEvent {
   TALK_STREAM_INTERRUPTED = 'TALK_STREAM_INTERRUPTED',
   SESSION_READY = 'SESSION_READY',
   SERVER_WARNING = 'SERVER_WARNING',
+  MIC_PERMISSION_PENDING = 'MIC_PERMISSION_PENDING',
+  MIC_PERMISSION_GRANTED = 'MIC_PERMISSION_GRANTED',
+  MIC_PERMISSION_DENIED = 'MIC_PERMISSION_DENIED',
 }
diff --git a/src/types/events/public/EventCallbacks.ts b/src/types/events/public/EventCallbacks.ts
@@ -18,4 +18,7 @@ export type EventCallbacks = {
   [AnamEvent.TALK_STREAM_INTERRUPTED]: (correlationId: string) => void;
   [AnamEvent.SESSION_READY]: (sessionId: string) => void;
   [AnamEvent.SERVER_WARNING]: (message: string) => void;
+  [AnamEvent.MIC_PERMISSION_PENDING]: () => void;
+  [AnamEvent.MIC_PERMISSION_GRANTED]: () => void;
+  [AnamEvent.MIC_PERMISSION_DENIED]: (error: string) => void;
 };
diff --git a/src/types/index.ts b/src/types/index.ts
@@ -5,6 +5,7 @@ export type * from './streaming';
 export type * from './coreApi';
 export type { PersonaConfig } from './PersonaConfig';
 export type { InputAudioState } from './InputAudioState';
+export { AudioPermissionState } from './InputAudioState';
 export type * from './messageHistory';
 export { MessageRole } from './messageHistory'; // need to export this explicitly to avoid enum import issues
 export type * from './events';

Original file line number	Diff line number	Diff line change
`@@ -10,4 +10,7 @@ export enum AnamEvent {`
`10`	`10`	`TALK_STREAM_INTERRUPTED = 'TALK_STREAM_INTERRUPTED',`
`11`	`11`	`SESSION_READY = 'SESSION_READY',`
`12`	`12`	`SERVER_WARNING = 'SERVER_WARNING',`
	`13`	`+ MIC_PERMISSION_PENDING = 'MIC_PERMISSION_PENDING',`
	`14`	`+ MIC_PERMISSION_GRANTED = 'MIC_PERMISSION_GRANTED',`
	`15`	`+ MIC_PERMISSION_DENIED = 'MIC_PERMISSION_DENIED',`
`13`	`16`	`}`