@@ -68,11 +68,13 @@ class VoiceChatClient {
6868 }
6969
7070 setupEventListeners ( ) {
71+ // Listen and log transport state changes
7172 this . transportSelect . addEventListener ( 'change' , ( e ) => {
7273 this . transportType = e . target . value ;
7374 this . addEvent ( 'transport-changed' , this . transportType ) ;
7475 } ) ;
7576
77+ // Setup connect button for connecting/disconnecting
7678 this . connectBtn . addEventListener ( 'click' , ( ) => {
7779 if ( this . isConnected ) {
7880 this . disconnect ( ) ;
@@ -81,6 +83,7 @@ class VoiceChatClient {
8183 }
8284 } ) ;
8385
86+ // Setup mic button for muting/unmuting
8487 this . micBtn . addEventListener ( 'click' , ( ) => {
8588 if ( this . client ) {
8689 const newState = ! this . client . isMicEnabled ;
@@ -89,6 +92,7 @@ class VoiceChatClient {
8992 }
9093 } ) ;
9194
95+ // Handle sending text input to LLM
9296 const userInput = document . getElementById ( 'user-input' ) ;
9397 const sendTextToLLM = ( ) => {
9498 if ( this . client && this . isConnected ) {
@@ -143,6 +147,9 @@ class VoiceChatClient {
143147 }
144148 } ,
145149 onBotOutput : ( data ) => {
150+ // Check the aggregation type. If WORD, embolden the word already rendered
151+ // in the bot transcript. Otherwise, add to the latest bot message or start
152+ // a new one.
146153 if ( data . aggregated_by === AggregationType . WORD ) {
147154 this . emboldenBotWord ( data . text ) ;
148155 return ;
@@ -176,6 +183,7 @@ class VoiceChatClient {
176183 }
177184
178185 setupAudio ( ) {
186+ // Listen for bot audio tracks and play them
179187 this . client . on ( RTVIEvent . TrackStarted , ( track , participant ) => {
180188 if ( ! participant ?. local && track . kind === 'audio' ) {
181189 this . addEvent ( 'track-started' , 'Bot audio track' ) ;
@@ -188,6 +196,7 @@ class VoiceChatClient {
188196 }
189197
190198 onConnected ( ) {
199+ // Update UI on connection
191200 this . isConnected = true ;
192201 this . connectBtn . textContent = 'Disconnect' ;
193202 this . connectBtn . classList . add ( 'disconnect' ) ;
@@ -204,6 +213,7 @@ class VoiceChatClient {
204213 }
205214
206215 onDisconnected ( ) {
216+ // Update UI on disconnection
207217 this . isConnected = false ;
208218 this . connectBtn . textContent = 'Connect' ;
209219 this . connectBtn . classList . remove ( 'disconnect' ) ;
@@ -215,49 +225,70 @@ class VoiceChatClient {
215225 }
216226
217227 updateMicButton ( enabled ) {
228+ // Update the microphone button UI based on whether the mic is enabled
218229 this . micStatus . textContent = enabled ? 'Mic is On' : 'Mic is Off' ;
219230 this . micBtn . style . backgroundColor = enabled ? '#10b981' : '#1f2937' ;
220231 }
221232
222233 emboldenBotWord ( word ) {
234+ // This method does it's best to find the word provided in the rendered bot
235+ // transcript and embolden it. It keeps track of which bubble and index
236+ // it's at to avoid searching from the start each time. It simply looks for
237+ // the next occurrence of the word in the current bubble and emboldens all the
238+ // text up to that word. This means it may fail if the word does not
239+ // match exactly what was rendered (e.g., punctuation, casing, etc), but
240+ // it's a best effort.
223241 if ( this . curBotSpan < 0 ) return ;
224242 const curSpan = this . botSpans [ this . curBotSpan ] ;
225243 if ( ! curSpan ) return ;
244+ // Get the inner HTML without <strong> tags
226245 const spanInnards = curSpan . innerHTML . replace ( / < \/ ? s t r o n g > / g, '' ) ;
246+ // Split into already spoken (and emboldened) and yet to be spoken (and emboldened)
227247 const alreadyEmboldened = spanInnards . slice ( 0 , this . lastBotWordIndex || 0 ) ;
228248 const yetToEmbolden = spanInnards . slice ( this . lastBotWordIndex || 0 ) ;
229249
250+ // For the yet to embolden part, find the next occurrence of the word
230251 const wordIndex = yetToEmbolden . indexOf ( word ) ;
231252 if ( wordIndex === - 1 ) {
253+ // If the word is not found, we may have finished this span
254+ // move to the next span if available
232255 if ( this . botSpans . length > this . curBotSpan + 1 ) {
256+ // Once we complete a span, mark it as spoken. This removes the need
257+ // for inserting <strong> tags and simplifies the innerHTML.
233258 curSpan . innerHTML = spanInnards ;
234259 curSpan . classList . add ( 'spoken' ) ;
235260
236261 // Move to next bubble
237262 this . curBotSpan = this . curBotSpan + 1 ;
238263 this . lastBotWordIndex = 0 ;
264+ // Try again with the next span
239265 this . emboldenBotWord ( word ) ;
240266 return ;
241267 }
242268 return ;
243269 }
244- // Replace the first occurrence of the word with <strong> word</strong>
270+ // Replace the first occurrence of the word with word</strong>
245271 // Use word boundaries to match the whole word
246272 const replaced = yetToEmbolden . replace ( word , `${ word } </strong>` ) ;
247273
274+ // Update the inner HTML so that <strong> wraps all text up until
275+ // and including the current word
248276 curSpan . innerHTML = '<strong>' + alreadyEmboldened + replaced ;
277+ // Scroll to bottom
249278 this . conversationLog . scrollTop = this . conversationLog . scrollHeight ;
250279
251280 // Update lastBotWordIndex
252281 this . lastBotWordIndex =
253282 ( this . lastBotWordIndex || 0 ) + wordIndex + word . length ;
254283 }
255284
285+ // Create a new element to add to the bot bubble based on aggregation type
256286 createBotBubbleElement ( text , type ) {
257287 let newElement ;
258288 switch ( type ) {
259289 case 'code' :
260290 {
291+ // Create a code block with syntax highlighting
261292 newElement = document . createElement ( 'pre' ) ;
262293 const codeDiv = document . createElement ( 'code' ) ;
263294 codeDiv . textContent = text ;
@@ -267,6 +298,7 @@ class VoiceChatClient {
267298 break ;
268299 case 'link' :
269300 {
301+ // Create a link element
270302 newElement = document . createElement ( 'div' ) ;
271303 const link = document . createElement ( 'a' ) ;
272304 link . href = text ;
@@ -277,8 +309,10 @@ class VoiceChatClient {
277309 break ;
278310 default :
279311 {
312+ // All other text is rendered in a simple span and new lines are converted to <br>
280313 newElement = document . createElement ( 'span' ) ;
281314 text = text . trim ( ) ;
315+ // We add spaces around the <br> to ensure we don't break our emboldening logic
282316 newElement . innerHTML = text . replace ( / \n / g, ' <br> ' ) ;
283317 this . botSpans . push ( newElement ) ;
284318 if ( this . curBotSpan === - 1 ) {
@@ -287,10 +321,12 @@ class VoiceChatClient {
287321 }
288322 break ;
289323 }
324+ // Attach the aggregation type for later reference
290325 newElement . type = type ;
291326 return newElement ;
292327 }
293328
329+ // Add text to the last bubble, handling different types appropriately
294330 addToLastBubble ( text , role , type ) {
295331 const appendText = ( element , text ) => {
296332 text = text . trim ( ) ;
@@ -301,29 +337,38 @@ class VoiceChatClient {
301337 } ;
302338
303339 if ( role === 'user' ) {
340+ // If the role is user, always simply append the text and return.
341+ // There is no special rendering for user messages.
304342 appendText ( this . lastConversationBubble , text ) ;
305343 return ;
306344 }
307345
346+ // For bot messages, if the last element is text and the new type is also text,
347+ // we can simply append to it.
308348 const lastChild = this . lastConversationBubble . lastChild ;
309349 if ( lastChild && typeIsText ( lastChild . type ) && typeIsText ( type ) ) {
310350 appendText ( lastChild , text ) ;
311351 return ;
312352 }
353+ // If we're here, then the text is part of the bot transcript and either not
354+ // text or a different type than the last element. Create a new element to add
355+ // to the bot transcript bubble.
313356 this . lastConversationBubble . appendChild (
314357 this . createBotBubbleElement ( text , type )
315358 ) ;
316359 }
317360
361+ // Entry point for adding text to the conversation log
318362 addConversationMessage ( text , role , type = AggregationType . SENTENCE ) {
319- // Only start a new bubble if the role changes
363+ // If the role changes, create a new bubble. Otherwise, add to the last bubble.
320364 if ( this . lastConversationBubble ?. role === role ) {
321365 this . addToLastBubble ( text , role , type ) ;
322366 } else {
323367 this . createConversationBubble ( text , role , type ) ;
324368 }
325369 }
326370
371+ // Create a new conversation bubble along with its initial text
327372 createConversationBubble ( text , role , type ) {
328373 const messageDiv = document . createElement ( 'div' ) ;
329374 messageDiv . className = `conversation-message ${ role } ${ type } ` ;
@@ -340,6 +385,8 @@ class VoiceChatClient {
340385 this . conversationLog . scrollTop = this . conversationLog . scrollHeight ;
341386 }
342387
388+ // The client UI also has an event log for debugging and observability.
389+ // The method below adds entries to that log.
343390 addEvent ( eventName , data ) {
344391 const eventDiv = document . createElement ( 'div' ) ;
345392 eventDiv . className = 'event-entry' ;
0 commit comments