feat(run/chat): display token usage

doringeman · doringeman · commit f5b50e4e83f1 · 2025-09-19T12:47:03.000+03:00
Signed-off-by: Dorin Geman &lt;dorin.geman@docker.com&gt;
diff --git a/commands/run.go b/commands/run.go
@@ -271,7 +271,7 @@ func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, backend, model
 		// Simple case: just stream as plain text
 		return client.Chat(backend, model, prompt, apiKey, func(content string) {
 			cmd.Print(content)
-		})
+		}, false)
 	}
 
 	// For markdown: use streaming buffer to render code blocks as they complete
@@ -289,7 +289,7 @@ func chatWithMarkdown(cmd *cobra.Command, client *desktop.Client, backend, model
 		} else if rendered != "" {
 			cmd.Print(rendered)
 		}
-	})
+	}, true)
 	if err != nil {
 		return err
 	}
diff --git a/desktop/api.go b/desktop/api.go
@@ -40,4 +40,9 @@ type OpenAIChatResponse struct {
 		Index        int    `json:"index"`
 		FinishReason string `json:"finish_reason"`
 	} `json:"choices"`
+	Usage *struct {
+		CompletionTokens int `json:"completion_tokens"`
+		PromptTokens     int `json:"prompt_tokens"`
+		TotalTokens      int `json:"total_tokens"`
+	} `json:"usage,omitempty"`
 }
diff --git a/desktop/desktop.go b/desktop/desktop.go
@@ -365,7 +365,7 @@ func (c *Client) fullModelID(id string) (string, error) {
 }
 
 // Chat performs a chat request and streams the response content with selective markdown rendering.
-func (c *Client) Chat(backend, model, prompt, apiKey string, outputFunc func(string)) error {
+func (c *Client) Chat(backend, model, prompt, apiKey string, outputFunc func(string), shouldUseMarkdown bool) error {
 	model = normalizeHuggingFaceModelName(model)
 	if !strings.Contains(strings.Trim(model, "/"), "/") {
 		// Do an extra API call to check if the model parameter isn't a model ID.
@@ -422,7 +422,14 @@ func (c *Client) Chat(backend, model, prompt, apiKey string, outputFunc func(str
 	)
 
 	printerState := chatPrinterNone
-	reasoningFmt := color.New(color.FgWhite).Add(color.Italic)
+	reasoningFmt := color.New().Add(color.Italic)
+
+	var finalUsage *struct {
+		CompletionTokens int `json:"completion_tokens"`
+		PromptTokens     int `json:"prompt_tokens"`
+		TotalTokens      int `json:"total_tokens"`
+	}
+
 	scanner := bufio.NewScanner(resp.Body)
 	for scanner.Scan() {
 		line := scanner.Text()
@@ -445,6 +452,10 @@ func (c *Client) Chat(backend, model, prompt, apiKey string, outputFunc func(str
 			return fmt.Errorf("error parsing stream response: %w", err)
 		}
 
+		if streamResp.Usage != nil {
+			finalUsage = streamResp.Usage
+		}
+
 		if len(streamResp.Choices) > 0 {
 			if streamResp.Choices[0].Delta.ReasoningContent != "" {
 				chunk := streamResp.Choices[0].Delta.ReasoningContent
@@ -454,14 +465,14 @@ func (c *Client) Chat(backend, model, prompt, apiKey string, outputFunc func(str
 				if printerState != chatPrinterReasoning {
 					const thinkingHeader = "Thinking:\n"
 					if reasoningFmt != nil {
-						outputFunc(reasoningFmt.Sprint(thinkingHeader))
+						reasoningFmt.Print(thinkingHeader)
 					} else {
 						outputFunc(thinkingHeader)
 					}
 				}
 				printerState = chatPrinterReasoning
 				if reasoningFmt != nil {
-					outputFunc(reasoningFmt.Sprint(chunk))
+					reasoningFmt.Print(chunk)
 				} else {
 					outputFunc(chunk)
 				}
@@ -481,6 +492,19 @@ func (c *Client) Chat(backend, model, prompt, apiKey string, outputFunc func(str
 		return fmt.Errorf("error reading response stream: %w", err)
 	}
 
+	if finalUsage != nil {
+		usageInfo := fmt.Sprintf("\n\nToken usage: %d prompt + %d completion = %d total",
+			finalUsage.PromptTokens,
+			finalUsage.CompletionTokens,
+			finalUsage.TotalTokens)
+
+		usageFmt := color.New(color.FgHiBlack)
+		if !shouldUseMarkdown {
+			usageFmt.DisableColor()
+		}
+		outputFunc(usageFmt.Sprint(usageInfo))
+	}
+
 	return nil
 }
 
diff --git a/desktop/desktop_test.go b/desktop/desktop_test.go
@@ -63,7 +63,7 @@ func TestChatHuggingFaceModel(t *testing.T) {
 		Body:       io.NopCloser(bytes.NewBufferString("data: {\"choices\":[{\"delta\":{\"content\":\"Hello there!\"}}]}\n")),
 	}, nil)
 
-	err := client.Chat("", modelName, prompt, "", func(s string) {})
+	err := client.Chat("", modelName, prompt, "", func(s string) {}, false)
 	assert.NoError(t, err)
 }
 

Original file line number	Diff line number	Diff line change
`@@ -271,7 +271,7 @@ func chatWithMarkdown(cmd cobra.Command, client desktop.Client, backend, model`
`271`	`271`	`// Simple case: just stream as plain text`
`272`	`272`	`return client.Chat(backend, model, prompt, apiKey, func(content string) {`
`273`	`273`	`cmd.Print(content)`
`274`		`- })`
	`274`	`+ }, false)`
`275`	`275`	`}`
`276`	`276`
`277`	`277`	`// For markdown: use streaming buffer to render code blocks as they complete`
`@@ -289,7 +289,7 @@ func chatWithMarkdown(cmd cobra.Command, client desktop.Client, backend, model`
`289`	`289`	`} else if rendered != "" {`
`290`	`290`	`cmd.Print(rendered)`
`291`	`291`	`}`
`292`		`- })`
	`292`	`+ }, true)`
`293`	`293`	`if err != nil {`
`294`	`294`	`return err`
`295`	`295`	`}`
Original file line number	Diff line number	Diff line change
`@@ -63,7 +63,7 @@ func TestChatHuggingFaceModel(t *testing.T) {`
`63`	`63`	`Body: io.NopCloser(bytes.NewBufferString("data: {\"choices\":[{\"delta\":{\"content\":\"Hello there!\"}}]}\n")),`
`64`	`64`	`}, nil)`
`65`	`65`
`66`		`- err := client.Chat("", modelName, prompt, "", func(s string) {})`
	`66`	`+ err := client.Chat("", modelName, prompt, "", func(s string) {}, false)`
`67`	`67`	`assert.NoError(t, err)`
`68`	`68`	`}`
`69`	`69`