Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,6 @@
"ENABLE_SPOTLIGHT": false,
"AUTO_CONNECT_RTM": false,
"ENABLE_CONVERSATIONAL_AI": false,
"CUSTOMIZE_AGENT": false
"CUSTOMIZE_AGENT": false,
"SONIOX_API_KEY": "5d1b0276e3b1e647485d42e7d1439430dea7459b3248a55f98928c8b1074f1d7"
}
110 changes: 110 additions & 0 deletions template/bridge/rtc/webNg/RtcEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import type {
Subscription,
} from 'react-native-agora/lib/typescript/src/common/RtcEvents';

import {RecordTranscribe} from '@soniox/speech-to-text-web';

import {IRtcEngine} from 'react-native-agora';
import {VideoProfile} from '../quality';
import {ChannelProfileType, ClientRoleType} from '../../../agora-rn-uikit';
Expand Down Expand Up @@ -222,6 +224,7 @@ export default class RtcEngine {
// public AgoraRTC: any;
public client: IAgoraRTCClient;
public screenClient: any | IAgoraRTCClient;

public eventsMap = new Map<string, callbackType>([
['onUserJoined', () => null],
['onUserOffline', () => null],
Expand All @@ -232,10 +235,13 @@ export default class RtcEngine {
['onNetworkQuality', () => null],
['onActiveSpeaker', () => null],
['onStreamMessage', () => null],
['onSonioxTranscriptionResult', () => null],
]);

public localStream: LocalStream = {};
public screenStream: ScreenStream = {};
public remoteStreams = new Map<UID, RemoteStream>();
public isSonioxPanelOpen = false;
private inScreenshare: Boolean = false;
private videoProfile:
| VideoEncoderConfigurationPreset
Expand All @@ -255,13 +261,101 @@ export default class RtcEngine {
private muteLocalAudioMutex = false;
private speakerDeviceId = '';
private usersVolumeLevel = [];

// Create channel profile and set it here

initialize(context: RtcEngineContext) {
const {appId} = context;
logger.log(LogSource.AgoraSDK, 'Log', 'RTC engine initialized');
this.appId = appId;
this.sonioxTranscribers = new Map();
this.customEvents = new Map();
this.localUserId = null;
}
addCustomListener(eventName: string, callback: (...args: any[]) => void) {
this.customEvents.set(eventName, callback);
}

removeCustomListener(eventName: string) {
this.customEvents.delete(eventName);
}

async startSonioxTranscription(uid: UID, apiKey: string, isLocal: boolean) {
let stream: MediaStream | null = null;

// Select local or remote stream
if (isLocal) {
this.localUserId = uid;
if (!this.localStream?.audio) {
console.log('No local audio stream available', uid);
return;
} else {
stream = new MediaStream([
this.localStream.audio.getMediaStreamTrack(),
]);
}
} else {
const remoteAudio = this.remoteStreams.get(uid)?.audio;
if (!remoteAudio) {
console.warn(`No remote audio stream found for UID ${uid}`);
return;
} else {
stream = new MediaStream([remoteAudio.getMediaStreamTrack()]);
}
}

// Create a new transcriber instance
const transcriber = new RecordTranscribe({apiKey});

// Start transcription for the single stream
await transcriber.start({
model: 'stt-rt-preview',
stream,
languageHints: ['en'],
sampleRate: 48000,
numChannels: 1,
enableLanguageIdentification: false,
enableEndpointDetection: false,
// translation: {
// type: 'one_way',
// source_languages: ['en'],
// target_language: 'hi',
// },
onPartialResult: results => {
const callback = this.customEvents.get('onSonioxTranscriptionResult');
if (callback) callback(uid, {uid, ...results});
},
onError: (status, message, code) => {
console.error(
`Soniox Transcription Error (${uid}):`,
status,
message,
code,
);
},
onStarted: () => {
console.log(`Soniox started transcription for UID: ${uid}`);
},
onStateChange: ({oldState, newState}) => {
console.log(`Soniox state (${uid}): ${oldState} → ${newState}`);
},
onFinished: () => {
console.log(` Soniox transcription session finished for UID: ${uid}`);
},
});

// Track this transcriber
this.sonioxTranscribers.set(uid, transcriber);
}

stopSonioxTranscription(): void {
for (const [uid, transcriber] of this.sonioxTranscribers.entries()) {
transcriber.stop();
console.log(` Stopped Soniox transcription for user UID: ${uid}`);
}
this.sonioxTranscribers.clear();
}

getLocalVideoStats() {
try {
logger.log(
Expand Down Expand Up @@ -771,6 +865,14 @@ export default class RtcEngine {
0,
0,
);
// Only start transcriber if panel is open & not already started
if (this.isSonioxPanelOpen && !this.sonioxTranscribers.has(user.uid)) {
this.startSonioxTranscription(
user.uid,
$config.SONIOX_API_KEY,
false,
);
}
} else {
const videoTrack = user.videoTrack;
// Play the video
Expand Down Expand Up @@ -1022,6 +1124,14 @@ export default class RtcEngine {
// Release the lock once done
this.muteLocalAudioMutex = false;
this.isAudioEnabled = !muted;

// Stop/ Start Local Transcriber on local mute/unmute
const transcriber = this.sonioxTranscribers.get(this.localUserId);
if (muted) {
await transcriber.stop();
} else {
await transcriber.start(transcriber._audioOptions);
}
// Unpublish only after when the user has joined the call
if (!muted && !this.isAudioPublished && this.isJoined) {
logger.log(
Expand Down
1 change: 1 addition & 0 deletions template/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"@react-native-async-storage/async-storage": "1.19.2",
"@react-native-community/checkbox": "0.5.16",
"@react-native-community/clipboard": "1.5.1",
"@soniox/speech-to-text-web": "^1.1.4",
"@splinetool/runtime": "^1.9.69",
"@supersami/rn-foreground-service": "^1.1.1",
"add": "^2.0.6",
Expand Down
2 changes: 2 additions & 0 deletions template/src/pages/video-call/VideoCallScreen.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ import {useIsRecordingBot} from '../../subComponents/recording/useIsRecordingBot
import {ToolbarPresetProps} from '../../atoms/ToolbarPreset';
import CustomSidePanelView from '../../components/CustomSidePanel';
import {useControlPermissionMatrix} from '../../components/controls/useControlPermissionMatrix';
import SonixCaptionContainer from '../../subComponents/caption/SonixCaptionContainer';

const VideoCallScreen = () => {
useFindActiveSpeaker();
Expand Down Expand Up @@ -454,6 +455,7 @@ const VideoCallScreen = () => {
/>
) : (
<>
{isCaptionON ? <SonixCaptionContainer /> : <></>}
{isCaptionON ? <CaptionComponent /> : <></>}
<Spacer size={$config.ENABLE_CONVERSATIONAL_AI ? 20 : 10} />
<View
Expand Down
182 changes: 182 additions & 0 deletions template/src/subComponents/caption/SonixCaptionContainer.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// @ts-nocheck
import {StyleSheet, Text, View, ScrollView} from 'react-native';
import React, {useEffect, useRef, useState} from 'react';
import ThemeConfig from '../../theme';
import {CAPTION_CONTAINER_HEIGHT} from '../../components/CommonStyles';
import {useRtc, useContent, useLocalUid, useCaption} from 'customization-api';
import PQueue from 'p-queue';

const formatTime = (timestamp: number) => {
const date = new Date(timestamp);
return date.toLocaleTimeString([], {
hour: '2-digit',
minute: '2-digit',
hour12: true,
});
};

const SonixCaptionContainer = () => {
const {RtcEngineUnsafe} = useRtc();
const {defaultContent, activeUids} = useContent();
const localUid = useLocalUid();
const {captionFeed, setCaptionFeed} = useCaption();
const scrollRef = React.useRef<ScrollView>(null);
const queueRef = React.useRef(new PQueue({concurrency: 1}));
const [autoScroll, setAutoScroll] = useState(true);

// in-progress captions per speaker now
const activeCaptionsRef = useRef({});

const engine = RtcEngineUnsafe;

useEffect(() => {
engine.isSonioxPanelOpen = true;

engine.addCustomListener(
'onSonioxTranscriptionResult',
sonixCaptionCallback,
);

activeUids.map(uid => {
engine.startSonioxTranscription(
uid,
$config.SONIOX_API_KEY,
uid === localUid,
);
});

return () => {
engine.isSonioxPanelOpen = false;
engine.stopSonioxTranscription();
};
}, []);

const sonixCaptionCallback = (uid, transcript) => {
const queueCallback = () => {
console.log('sonix transcript =>', uid, transcript);

const finalText = transcript.tokens
.filter(t => t.is_final)
.map(t => t.text)
.join('');
const nonFinalText = transcript.tokens
.filter(t => !t.is_final)
.map(t => t.text)
.join('');

// merge into in-progress buffer
const active = activeCaptionsRef.current[uid] || {
uid,
text: '',
nonFinal: '',
time: Date.now(),
};

if (finalText) {
active.text = (active.text + ' ' + finalText).trim();
}
active.nonFinal = nonFinalText;
active.time = Date.now();
activeCaptionsRef.current[uid] = active;

// If fully finalized, commit to feed + remove from active buffer
if (!nonFinalText && finalText) {
setCaptionFeed(prev => [...prev, {...active, nonFinal: ''}]);
delete activeCaptionsRef.current[uid];
} else {
// partial update: force rerender by setting dummy feed (not needed in your hook-based context)
setCaptionFeed(prev => [...prev]); // triggers UI refresh
}
};

queueRef.current.add(queueCallback);
};

const handleScroll = event => {
const {layoutMeasurement, contentOffset, contentSize} = event.nativeEvent;
const isAtBottom =
layoutMeasurement.height + contentOffset.y >= contentSize.height - 20;
setAutoScroll(isAtBottom);
};

return (
<ScrollView
style={styles.scrollContainer}
contentContainerStyle={styles.container}
ref={scrollRef}
showsVerticalScrollIndicator={true}
onScroll={handleScroll}
scrollEventThrottle={16}
onContentSizeChange={() => {
if (autoScroll) {
scrollRef.current?.scrollToEnd({animated: true});
}
}}>
{/* Show committed lines */}
{captionFeed.map((entry, index) => (
<Text key={`feed-${index}`} style={styles.captionLine}>
<Text style={styles.uid}>
{defaultContent[entry.uid]?.name} ({formatTime(entry.time)}) :
</Text>
<Text style={styles.content}> {entry.text}</Text>
</Text>
))}

{/* Show all active speakers */}
{Object.values(activeCaptionsRef.current)
.filter(entry => entry.text || entry.nonFinal)
.map((entry, index) => (
<Text key={`active-${index}`} style={styles.captionLine}>
<Text style={styles.uid}>
{defaultContent[entry.uid]?.name} ({formatTime(entry.time)}) :
</Text>
<Text style={styles.content}> {entry.text}</Text>
{entry.nonFinal && (
<Text style={styles.live}> {entry.nonFinal}</Text>
)}
</Text>
))}
</ScrollView>
);
};

export default SonixCaptionContainer;

const styles = StyleSheet.create({
scrollContainer: {
maxHeight: CAPTION_CONTAINER_HEIGHT,
height: CAPTION_CONTAINER_HEIGHT,
backgroundColor: '#815f46',
borderRadius: ThemeConfig.BorderRadius.small,
marginTop: $config.ICON_TEXT ? 8 : 0,
overflowY: 'scroll',
},
container: {
padding: 12,
flexGrow: 1,
},
captionLine: {
flexDirection: 'row',
flexWrap: 'wrap',
marginBottom: 4,
flexShrink: 1,
lineHeight: 24,
},
uid: {
color: 'orange',
fontWeight: 'bold',
fontSize: 18,
lineHeight: 24,
},
content: {
color: 'white',
fontSize: 18,
flexShrink: 1,
lineHeight: 24,
},
live: {
color: 'skyblue',
fontSize: 18,
lineHeight: 24,
},
});
Loading