feat: add audio interface support (COR-000) (#178)

voiceflow · Sep 24, 2024 · f1954c1 · f1954c1
1 parent cba9e3e
commit f1954c1
Show file tree

Hide file tree

Showing 32 changed files with 540 additions and 54 deletions.
diff --git a/packages/react-chat/.storybook/preview.tsx b/packages/react-chat/.storybook/preview.tsx
@@ -1,3 +1,5 @@
+import 'regenerator-runtime/runtime';
+
 import type { Preview } from '@storybook/react';
 import React from 'react';
 

diff --git a/packages/react-chat/e2e/extensions.spec.ts b/packages/react-chat/e2e/extensions.spec.ts
@@ -92,6 +92,7 @@ test('render response extension from incoming trace', async ({ page }) => {
             type: 'submit',
             payload: { name: 'Alex', hair: 'curly' },
           },
+          config: { tts: false },
         });
 
         return route.fulfill({ json: { trace: [] } });

diff --git a/packages/react-chat/package.json b/packages/react-chat/package.json
@@ -42,12 +42,12 @@
     "test:unit": "yarn g:vitest run --coverage"
   },
   "dependencies": {
-    "@voiceflow/base-types": "2.111.5",
+    "@voiceflow/base-types": "2.113.1",
     "@voiceflow/dtos-interact": "1.1.0",
     "@voiceflow/sdk-runtime": "workspace:*",
     "@voiceflow/slate-serializer": "1.5.5",
     "@voiceflow/stitches-react": "2.3.1",
-    "@voiceflow/voiceflow-types": "3.31.6",
+    "@voiceflow/voiceflow-types": "3.32.1",
     "bowser": "2.11.0",
     "chroma-js": "2.4.2",
     "clsx": "1.2.1",
@@ -57,7 +57,9 @@
     "react": "18.2.0",
     "react-dom": "18.2.0",
     "react-markdown": "9.0.0",
+    "react-speech-recognition": "3.10.0",
     "react-textarea-autosize": "8.5.3",
+    "regenerator-runtime": "0.13.11",
     "rehype-raw": "7.0.0",
     "rehype-sanitize": "6.0.0",
     "remark-gfm": "4.0.0",
@@ -89,6 +91,7 @@
     "@types/node": "20.12.7",
     "@types/react": "18.2.8",
     "@types/react-dom": "18.2.4",
+    "@types/react-speech-recognition": "^3.9.5",
     "@vitejs/plugin-react": "4.2.1",
     "@voiceflow/test-common": "1.10.3",
     "chromatic": "11.2.0",

diff --git a/packages/react-chat/src/assets/svg/index.ts b/packages/react-chat/src/assets/svg/index.ts
@@ -1,7 +1,11 @@
 export { default as close } from './close.svg?react';
 export { default as closeV2 } from './closeV2.svg?react';
 export { default as largeArrowLeft } from './large-arrow-left.svg?react';
+export { default as microphone } from './microphone.svg?react';
 export { default as minus } from './minus.svg?react';
 export { default as smallArrowUp } from './small-arrow-up.svg?react';
+export { default as sound } from './sound.svg?react';
+export { default as soundOff } from './sound-off.svg?react';
+export { default as stop } from './stop.svg?react';
 export { default as thumbsUp } from './thumbs-up.svg?react';
 export { default as topCaret } from './top-caret.svg?react';
diff --git a/packages/react-chat/src/assets/svg/microphone.svg b/packages/react-chat/src/assets/svg/microphone.svg
diff --git a/packages/react-chat/src/assets/svg/sound-off.svg b/packages/react-chat/src/assets/svg/sound-off.svg
diff --git a/packages/react-chat/src/assets/svg/sound.svg b/packages/react-chat/src/assets/svg/sound.svg
diff --git a/packages/react-chat/src/assets/svg/stop.svg b/packages/react-chat/src/assets/svg/stop.svg
diff --git a/packages/react-chat/src/components/Chat/index.tsx b/packages/react-chat/src/components/Chat/index.tsx
@@ -27,6 +27,11 @@ export interface ChatProps extends HeaderProps, AssistantInfoProps, FooterProps,
    */
   isLoading: boolean;
 
+  /**
+   * If true, shows audio interface controls.
+   */
+  audioInterface?: boolean;
+
   /**
    * A unix timestamp indicating the start of the conversation.
    */
@@ -62,12 +67,13 @@ const Chat: React.FC<ChatProps> = ({
   onStart,
   onSend,
   children,
+  audioInterface,
 }) => {
   const timestamp = useTimestamp(startTime);
   const dialogRef = useRef<HTMLElement>(null);
   const [hasAlert, setAlert] = useState(false);
 
-  const { config } = useContext(RuntimeStateAPIContext);
+  const { config, toggleAudioOutput } = useContext(RuntimeStateAPIContext);
   const state = useContext(RuntimeStateContext);
 
   const handleClose = (event: React.MouseEvent<HTMLButtonElement>): void => {
@@ -81,14 +87,21 @@ const Chat: React.FC<ChatProps> = ({
   const handleResume = (): void => setAlert(false);
 
   const actions = useMemo<HeaderActionProps[]>(() => {
+    const items: HeaderActionProps[] = [{ svg: 'close', onClick: handleClose }];
+
     if (config.render?.mode === RenderMode.OVERLAY) {
-      return [
-        { svg: 'minus', onClick: onMinimize },
-        { svg: 'close', onClick: handleClose },
-      ];
+      items.unshift({ svg: 'minus', onClick: onMinimize });
     }
-    return [{ svg: 'close', onClick: handleClose }];
-  }, [config.render, handleClose, onMinimize]);
+
+    if (audioInterface) {
+      items.unshift({
+        svg: state.audioOutput ? 'sound' : 'soundOff',
+        onClick: toggleAudioOutput,
+      });
+    }
+
+    return items;
+  }, [config.render, handleClose, onMinimize, state.audioOutput, audioInterface]);
 
   if (isLoading) {
     return (
@@ -116,6 +129,8 @@ const Chat: React.FC<ChatProps> = ({
         onStart={onStart}
         onSend={onSend}
         disableSend={state.indicator}
+        audioInterface={audioInterface}
+        speechRecognition={config.speechRecognition}
       />
       <Overlay />
       <Prompt

diff --git a/packages/react-chat/src/components/ChatInput/AudioInputButton.tsx b/packages/react-chat/src/components/ChatInput/AudioInputButton.tsx
@@ -0,0 +1,16 @@
+import Icon from '../Icon';
+import { AutoInputButtonContainer } from './styled';
+
+interface AudioInputButtonProps {
+  onStop: () => void;
+  onStart: () => void;
+  listening: boolean;
+}
+
+export const AudioInputButton: React.FC<AudioInputButtonProps> = ({ onStop, onStart, listening }) => {
+  return (
+    <AutoInputButtonContainer onClick={listening ? onStop : onStart} listening={listening}>
+      <Icon svg={listening ? 'stop' : 'microphone'} />
+    </AutoInputButtonContainer>
+  );
+};
diff --git a/packages/react-chat/src/components/ChatInput/hooks.ts b/packages/react-chat/src/components/ChatInput/hooks.ts
@@ -0,0 +1,105 @@
+import { useEffect, useLayoutEffect, useRef, useState } from 'react';
+import ReactSpeechRecognition, { useSpeechRecognition as useReactSpeechRecognition } from 'react-speech-recognition';
+
+import type { ChatSpeechRecognitionConfig, ChatSpeechRecognitionState } from '@/dtos/ChatConfig.dto';
+
+export const useSpeechRecognition = ({
+  onSend,
+  onValueChange,
+  customSpeechRecognition,
+}: {
+  onSend?: () => void;
+  onValueChange: (value: string) => void;
+  customSpeechRecognition?: ChatSpeechRecognitionConfig;
+}) => {
+  const textareaRef = useRef<HTMLTextAreaElement>(null);
+  const reactSpeechRecognition = useReactSpeechRecognition({ clearTranscriptOnListen: true });
+
+  const customSpeechRecognitionEnabled =
+    !!customSpeechRecognition &&
+    (customSpeechRecognition.overrideNative || !reactSpeechRecognition.browserSupportsSpeechRecognition);
+
+  const prevListening = useRef(
+    customSpeechRecognitionEnabled ? customSpeechRecognition.initialState.listening : reactSpeechRecognition.listening
+  );
+  const prevProcessing = useRef(
+    customSpeechRecognitionEnabled ? customSpeechRecognition.initialState.processing : false
+  );
+  const onSendPersisted = useRef(onSend);
+  onSendPersisted.current = onSend;
+
+  const [customSpeechRecognitionState, setCustomSpeechRecognitionState] = useState<ChatSpeechRecognitionState>(
+    customSpeechRecognitionEnabled
+      ? customSpeechRecognition.initialState
+      : {
+          listening: reactSpeechRecognition.listening,
+          transcript: reactSpeechRecognition.transcript,
+          processing: false,
+          microphoneAvailable: reactSpeechRecognition.isMicrophoneAvailable,
+        }
+  );
+
+  const onStartListening = (): void => {
+    if (customSpeechRecognitionEnabled) {
+      customSpeechRecognition.resetTranscript();
+      customSpeechRecognition.startListening();
+    } else {
+      reactSpeechRecognition.resetTranscript();
+      ReactSpeechRecognition.startListening({ continuous: true });
+    }
+  };
+
+  const onStopListening = (): void => {
+    if (customSpeechRecognitionEnabled) {
+      customSpeechRecognition.stopListening();
+    } else {
+      ReactSpeechRecognition.stopListening();
+    }
+  };
+
+  useLayoutEffect(() => {
+    if (customSpeechRecognitionEnabled || !reactSpeechRecognition.listening) return;
+
+    onValueChange(reactSpeechRecognition.transcript);
+  }, [customSpeechRecognitionEnabled, reactSpeechRecognition.transcript]);
+
+  useEffect(() => {
+    if (customSpeechRecognitionEnabled) {
+      if (prevProcessing.current && !customSpeechRecognitionState.processing) {
+        onSendPersisted.current?.();
+        customSpeechRecognition.resetTranscript();
+        textareaRef.current?.focus();
+      }
+
+      prevProcessing.current = customSpeechRecognitionState.processing;
+    } else {
+      if (prevListening.current && !reactSpeechRecognition.listening) {
+        onSendPersisted.current?.();
+        reactSpeechRecognition.resetTranscript();
+        textareaRef.current?.focus();
+      }
+
+      prevListening.current = reactSpeechRecognition.listening;
+    }
+  }, [customSpeechRecognitionEnabled, reactSpeechRecognition.listening, customSpeechRecognitionState.processing]);
+
+  useEffect(() => {
+    if (!customSpeechRecognitionEnabled) return undefined;
+
+    return customSpeechRecognition.onStateChange(setCustomSpeechRecognitionState);
+  }, [customSpeechRecognitionEnabled]);
+
+  return {
+    available: customSpeechRecognitionEnabled || reactSpeechRecognition.browserSupportsSpeechRecognition,
+    listening: customSpeechRecognitionEnabled
+      ? customSpeechRecognitionState.listening
+      : reactSpeechRecognition.listening,
+    processing: customSpeechRecognitionEnabled ? customSpeechRecognitionState.processing : false,
+    textareaRef,
+    stopListening: onStopListening,
+    startListening: onStartListening,
+    microphoneAvailable: customSpeechRecognitionEnabled
+      ? customSpeechRecognitionState.microphoneAvailable
+      : reactSpeechRecognition.isMicrophoneAvailable,
+  };
+};
diff --git a/packages/react-chat/src/components/ChatInput/index.tsx b/packages/react-chat/src/components/ChatInput/index.tsx
@@ -1,11 +1,14 @@
 import cuid from 'cuid';
-import { useMemo, useRef } from 'react';
+import { useMemo } from 'react';
 
 import Bubble from '@/components/Bubble';
 import type { TextareaProps } from '@/components/Textarea';
 import Textarea from '@/components/Textarea';
+import type { ChatSpeechRecognitionConfig } from '@/dtos/ChatConfig.dto';
 import { createControlled } from '@/utils/controls';
 
+import { AudioInputButton } from './AudioInputButton';
+import { useSpeechRecognition } from './hooks';
 import { ButtonContainer, Container } from './styled';
 
 export interface ChatInputProps extends TextareaProps {
@@ -14,18 +17,37 @@ export interface ChatInputProps extends TextareaProps {
    */
   disableSend?: boolean | undefined;
 
+  /**
+   * if true, shows audio interface controls.
+   */
+  audioInterface?: boolean | undefined;
+
   /**
    * A callback to submit the user response.
    */
   onSend?: VoidFunction;
+
+  /**
+   * Custom speech recognition implementation.
+   */
+  speechRecognition?: ChatSpeechRecognitionConfig;
 }
 
-const ChatInput: React.FC<ChatInputProps> = ({ id, onSend, disableSend, ...props }) => {
+const ChatInput: React.FC<ChatInputProps> = ({
+  id,
+  onSend,
+  disableSend,
+  onValueChange,
+  audioInterface,
+  speechRecognition: customSpeechRecognition,
+  ...props
+}) => {
   const internalID = useMemo(() => `vf-chat-input--${cuid()}`, []) ?? id;
-  const textareaRef = useRef<HTMLTextAreaElement>(null);
+  const speechRecognition = useSpeechRecognition({ onSend, onValueChange, customSpeechRecognition });
 
   const handleKeyPress = (event: React.KeyboardEvent<HTMLTextAreaElement>): void => {
     event.stopPropagation();
+
     const { shiftKey } = event;
 
     if (event.key !== 'Enter') return;
@@ -35,12 +57,31 @@ const ChatInput: React.FC<ChatInputProps> = ({ id, onSend, disableSend, ...props
     }
   };
 
+  const withSendButton = !!props.value && !disableSend && !speechRecognition.listening;
+  const withAudioInput =
+    speechRecognition.available && speechRecognition.microphoneAvailable && audioInterface && !withSendButton;
+
   return (
     <Container>
-      <Textarea ref={textareaRef} id={internalID} onKeyDown={handleKeyPress} {...props} />
-      <ButtonContainer htmlFor={internalID} ready={!!props.value && !disableSend}>
+      <Textarea
+        id={internalID}
+        ref={speechRecognition.textareaRef}
+        onKeyDown={handleKeyPress}
+        onValueChange={onValueChange}
+        {...props}
+      />
+
+      <ButtonContainer htmlFor={internalID} ready={withSendButton}>
         <Bubble size="small" svg="smallArrowUp" onClick={onSend} />
       </ButtonContainer>
+
+      {withAudioInput && (
+        <AudioInputButton
+          onStop={speechRecognition.stopListening}
+          onStart={speechRecognition.startListening}
+          listening={speechRecognition.listening}
+        />
+      )}
     </Container>
   );
 };