pipecat-ai
diff --git a/‎code-helper/README.md‎
Lines changed: 26 additions & 52 deletions b/‎code-helper/README.md‎
Lines changed: 26 additions & 52 deletions
diff --git a/‎code-helper/client/src/app.js‎
Lines changed: 49 additions & 2 deletions b/‎code-helper/client/src/app.js‎
Lines changed: 49 additions & 2 deletions
@@ -1,6 +1,31 @@
 # code-helper
 
-A Pipecat AI voice agent built with a cascade pipeline (STT → LLM → TTS).
+This example demonstrates using `LLMTextProcessor` to categorize the LLM's
+output text so that the client can easily render different types of output
+accordingly, while the TTS speaks these same types in separate but also
+custom ways, spelling out credit card numbers, while skipping trying to
+read out code snippets or not saying the 'https' part of a url.
+
+This example also includes a text entry box in the client to show how the
+bot handles text input and can respond either with audio or not and the
+categorization and "bot output" continues seemlessly.
+
+The client in this example will render the user and bot transcripts using
+simply the `user-transcript` and `bot-output` messages. The bot output will
+render each sentence and then highlight each word as it is said. All code
+provided by the bot will be highlighted as such and links will be formatted.
+
+Concepts this example is meant to demonstrate:
+- Custom handling of LLM text output for different purposes:
+  - For the purpose of having the TTS skip certain outputs or speak certain
+    outputs differently
+  - For the purpose of supporting a client UI for easier rendering of
+    different types of text or for altering for filtering out text before
+    sending it to the client.
+- Client <-> Bot Communication with RTVI
+- Tool calling for sensitive information and custom handling of that
+  information for TTS and RTVI purposes.
+- Client->Server Text input
 
 ## Configuration
 
@@ -10,10 +35,6 @@ A Pipecat AI voice agent built with a cascade pipeline (STT → LLM → TTS).
   - **STT**: Deepgram
   - **LLM**: OpenAI
   - **TTS**: ElevenLabs
-- **Features**:
-  - Transcription
-  - smart-turn v3
-  - Observability (Whisker + Tail)
 
 ## Setup
 
@@ -87,53 +108,6 @@ code-helper/
 
 This project includes observability tools to help you debug and monitor your bot:
 
-### Whisker - Live Pipeline Debugger
-
-**Whisker** is a live graphical debugger that lets you visualize pipelines and debug frames in real time.
-
-With Whisker you can:
-
-- 🗺️ View a live graph of your pipeline
-- ⚡ Watch frame processors flash in real time as frames pass through them
-- 📌 Select a processor to inspect the frames it has handled
-- 🔍 Filter frames by name to quickly find the ones you care about
-- 🧵 Select a frame to trace its full path through the pipeline
-- 💾 Save and load previous sessions for review and troubleshooting
-
-**To use Whisker:**
-
-1. Run an ngrok tunnel to expose your bot:
-
-   ```bash
-   ngrok http 9090
-   ```
-
-   > Tip: Use `--subdomain` for a repeatable ngrok URL
-
-2. Navigate to [https://whisker.pipecat.ai/](https://whisker.pipecat.ai/) and enter your ngrok URL (e.g., `your-subdomain.ngrok.io`)
-
-3. Once your bot is running, press connect
-
-### Tail - Terminal Dashboard
-
-**Tail** is a terminal dashboard that lets you monitor your Pipecat sessions in real time.
-
-With Tail you can:
-
-- 📜 Follow system logs in real time
-- 💬 Track conversations as they happen
-- 🔊 Monitor user and agent audio levels
-- 📈 Keep an eye on service metrics and usage
-
-**To use Tail:**
-
-1. Run your bot (in one terminal)
-
-2. Launch Tail in another terminal:
-   ```bash
-   pipecat tail
-   ```
-
 ## Learn More
 
 - [Pipecat Documentation](https://docs.pipecat.ai/)
 
@@ -68,11 +68,13 @@ class VoiceChatClient {
   }
 
   setupEventListeners() {
+    // Listen and log transport state changes
     this.transportSelect.addEventListener('change', (e) => {
       this.transportType = e.target.value;
       this.addEvent('transport-changed', this.transportType);
     });
 
+    // Setup connect button for connecting/disconnecting
     this.connectBtn.addEventListener('click', () => {
       if (this.isConnected) {
         this.disconnect();
@@ -81,6 +83,7 @@ class VoiceChatClient {
       }
     });
 
+    // Setup mic button for muting/unmuting
     this.micBtn.addEventListener('click', () => {
       if (this.client) {
         const newState = !this.client.isMicEnabled;
@@ -89,6 +92,7 @@ class VoiceChatClient {
       }
     });
 
+    // Handle sending text input to LLM
     const userInput = document.getElementById('user-input');
     const sendTextToLLM = () => {
       if (this.client && this.isConnected) {
@@ -143,6 +147,9 @@ class VoiceChatClient {
             }
           },
           onBotOutput: (data) => {
+            // Check the aggregation type. If WORD, embolden the word already rendered
+            // in the bot transcript. Otherwise, add to the latest bot message or start
+            // a new one.
             if (data.aggregated_by === AggregationType.WORD) {
               this.emboldenBotWord(data.text);
               return;
@@ -176,6 +183,7 @@ class VoiceChatClient {
   }
 
   setupAudio() {
+    // Listen for bot audio tracks and play them
     this.client.on(RTVIEvent.TrackStarted, (track, participant) => {
       if (!participant?.local && track.kind === 'audio') {
         this.addEvent('track-started', 'Bot audio track');
@@ -188,6 +196,7 @@ class VoiceChatClient {
   }
 
   onConnected() {
+    // Update UI on connection
     this.isConnected = true;
     this.connectBtn.textContent = 'Disconnect';
     this.connectBtn.classList.add('disconnect');
@@ -204,6 +213,7 @@ class VoiceChatClient {
   }
 
   onDisconnected() {
+    // Update UI on disconnection
     this.isConnected = false;
     this.connectBtn.textContent = 'Connect';
     this.connectBtn.classList.remove('disconnect');
@@ -215,49 +225,70 @@ class VoiceChatClient {
   }
 
   updateMicButton(enabled) {
+    // Update the microphone button UI based on whether the mic is enabled
     this.micStatus.textContent = enabled ? 'Mic is On' : 'Mic is Off';
     this.micBtn.style.backgroundColor = enabled ? '#10b981' : '#1f2937';
   }
 
   emboldenBotWord(word) {
+    // This method does it's best to find the word provided in the rendered bot
+    // transcript and embolden it. It keeps track of which bubble and index
+    // it's at to avoid searching from the start each time. It simply looks for
+    // the next occurrence of the word in the current bubble and emboldens all the
+    // text up to that word. This means it may fail if the word does not
+    // match exactly what was rendered (e.g., punctuation, casing, etc), but
+    // it's a best effort.
     if (this.curBotSpan < 0) return;
     const curSpan = this.botSpans[this.curBotSpan];
     if (!curSpan) return;
+    // Get the inner HTML without <strong> tags
     const spanInnards = curSpan.innerHTML.replace(/<\/?strong>/g, '');
+    // Split into already spoken (and emboldened) and yet to be spoken (and emboldened)
     const alreadyEmboldened = spanInnards.slice(0, this.lastBotWordIndex || 0);
     const yetToEmbolden = spanInnards.slice(this.lastBotWordIndex || 0);
 
+    // For the yet to embolden part, find the next occurrence of the word
     const wordIndex = yetToEmbolden.indexOf(word);
     if (wordIndex === -1) {
+      // If the word is not found, we may have finished this span
+      // move to the next span if available
       if (this.botSpans.length > this.curBotSpan + 1) {
+        // Once we complete a span, mark it as spoken. This removes the need
+        // for inserting <strong> tags and simplifies the innerHTML.
         curSpan.innerHTML = spanInnards;
         curSpan.classList.add('spoken');
 
         // Move to next bubble
         this.curBotSpan = this.curBotSpan + 1;
         this.lastBotWordIndex = 0;
+        // Try again with the next span
         this.emboldenBotWord(word);
         return;
       }
       return;
     }
-    // Replace the first occurrence of the word with <strong>word</strong>
+    // Replace the first occurrence of the word with word</strong>
     // Use word boundaries to match the whole word
     const replaced = yetToEmbolden.replace(word, `${word}</strong>`);
 
+    // Update the inner HTML so that <strong> wraps all text up until
+    // and including the current word
     curSpan.innerHTML = '<strong>' + alreadyEmboldened + replaced;
+    // Scroll to bottom
     this.conversationLog.scrollTop = this.conversationLog.scrollHeight;
 
     // Update lastBotWordIndex
     this.lastBotWordIndex =
       (this.lastBotWordIndex || 0) + wordIndex + word.length;
   }
 
+  // Create a new element to add to the bot bubble based on aggregation type
   createBotBubbleElement(text, type) {
     let newElement;
     switch (type) {
       case 'code':
         {
+          // Create a code block with syntax highlighting
           newElement = document.createElement('pre');
           const codeDiv = document.createElement('code');
           codeDiv.textContent = text;
@@ -267,6 +298,7 @@ class VoiceChatClient {
         break;
       case 'link':
         {
+          // Create a link element
           newElement = document.createElement('div');
           const link = document.createElement('a');
           link.href = text;
@@ -277,8 +309,10 @@ class VoiceChatClient {
         break;
       default:
         {
+          // All other text is rendered in a simple span and new lines are converted to <br>
           newElement = document.createElement('span');
           text = text.trim();
+          // We add spaces around the <br> to ensure we don't break our emboldening logic
           newElement.innerHTML = text.replace(/\n/g, ' <br> ');
           this.botSpans.push(newElement);
           if (this.curBotSpan === -1) {
@@ -287,10 +321,12 @@ class VoiceChatClient {
         }
         break;
     }
+    // Attach the aggregation type for later reference
     newElement.type = type;
     return newElement;
   }
 
+  // Add text to the last bubble, handling different types appropriately
   addToLastBubble(text, role, type) {
     const appendText = (element, text) => {
       text = text.trim();
@@ -301,29 +337,38 @@ class VoiceChatClient {
     };
 
     if (role === 'user') {
+      // If the role is user, always simply append the text and return.
+      // There is no special rendering for user messages.
       appendText(this.lastConversationBubble, text);
       return;
     }
 
+    // For bot messages, if the last element is text and the new type is also text,
+    // we can simply append to it.
     const lastChild = this.lastConversationBubble.lastChild;
     if (lastChild && typeIsText(lastChild.type) && typeIsText(type)) {
       appendText(lastChild, text);
       return;
     }
+    // If we're here, then the text is part of the bot transcript and either not
+    // text or a different type than the last element. Create a new element to add
+    // to the bot transcript bubble.
     this.lastConversationBubble.appendChild(
       this.createBotBubbleElement(text, type)
     );
   }
 
+  // Entry point for adding text to the conversation log
   addConversationMessage(text, role, type = AggregationType.SENTENCE) {
-    // Only start a new bubble if the role changes
+    // If the role changes, create a new bubble. Otherwise, add to the last bubble.
     if (this.lastConversationBubble?.role === role) {
       this.addToLastBubble(text, role, type);
     } else {
       this.createConversationBubble(text, role, type);
     }
   }
 
+  // Create a new conversation bubble along with its initial text
   createConversationBubble(text, role, type) {
     const messageDiv = document.createElement('div');
     messageDiv.className = `conversation-message ${role} ${type}`;
@@ -340,6 +385,8 @@ class VoiceChatClient {
     this.conversationLog.scrollTop = this.conversationLog.scrollHeight;
   }
 
+  // The client UI also has an event log for debugging and observability.
+  // The method below adds entries to that log.
   addEvent(eventName, data) {
     const eventDiv = document.createElement('div');
     eventDiv.className = 'event-entry';