Inferact · BugenZhao · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/rustfmt.toml b/rustfmt.toml
@@ -1 +1,3 @@
 style_edition = "2024"
+chain_width = 80
+use_field_init_shorthand = true
diff --git a/rustfmt.unstable.toml b/rustfmt.unstable.toml
@@ -2,9 +2,13 @@
 # Apply manually with:
 #   cargo +nightly fmt -- --config-path rustfmt.unstable.toml
 
-unstable_features = true
 style_edition = "2024"
-comment_width = 100
+chain_width = 80
+use_field_init_shorthand = true
+
+# Unstable features go here.
+unstable_features = true
+
 format_code_in_doc_comments = true
 format_macro_matchers = true
 normalize_comments = true

diff --git a/src/chat/examples/external_engine_chat_qwen.rs b/src/chat/examples/external_engine_chat_qwen.rs
@@ -96,10 +96,7 @@ async fn main() -> Result<()> {
     println!("request_id={request_id}");
     println!("prompt={}", args.prompt);
 
-    let mut stream = chat
-        .chat(request)
-        .await
-        .context("failed to submit chat request")?;
+    let mut stream = chat.chat(request).await.context("failed to submit chat request")?;
     let output = tokio::time::timeout(output_timeout, async {
         let mut final_reasoning = String::new();
         let mut final_text = String::new();
@@ -170,9 +167,7 @@ async fn main() -> Result<()> {
     .await
     .context("timed out waiting for chat output")??;
 
-    chat.shutdown()
-        .await
-        .context("failed to shut down chat client")?;
+    chat.shutdown().await.context("failed to shut down chat client")?;
 
     println!("final_reasoning={:?}", output.0);
     println!("final_text={:?}", output.1);

diff --git a/src/chat/src/backend/mod.rs b/src/chat/src/backend/mod.rs
@@ -25,7 +25,8 @@ pub trait ChatBackend: Send + Sync {
     /// Return the renderer used for chat-prompt construction.
     fn chat_renderer(&self) -> DynChatRenderer;
 
-    /// Create a request-scoped output processor after request-level adjustments are applied.
+    /// Create a request-scoped output processor after request-level adjustments
+    /// are applied.
     fn new_chat_output_processor(
         &self,
         request: &mut ChatRequest,
@@ -36,10 +37,12 @@ pub trait ChatBackend: Send + Sync {
 /// Shared trait-object form of [`ChatBackend`].
 pub type DynChatBackend = Arc<dyn ChatBackend>;
 
-/// Convenience trait for backends that can serve both raw text generation and chat templating.
+/// Convenience trait for backends that can serve both raw text generation and
+/// chat templating.
 ///
-/// This is mainly useful in tests and small examples, where one mock/backend often implements
-/// both sides and callers want `ChatLlm` to wire the shared object into `TextLlm` automatically.
+/// This is mainly useful in tests and small examples, where one mock/backend
+/// often implements both sides and callers want `ChatLlm` to wire the shared
+/// object into `TextLlm` automatically.
 pub trait ChatTextBackend: ChatBackend + TextBackend {}
 
 impl<T> ChatTextBackend for T where T: ChatBackend + TextBackend + ?Sized {}
@@ -54,11 +57,11 @@ pub struct LoadModelBackendsOptions {
     pub renderer: RendererSelection,
     /// How to serialize `message.content` when rendering the chat template.
     pub chat_template_content_format: ChatTemplateContentFormatOption,
-    /// Optional server-default chat template override, provided either as an inline template or
-    /// as a path to a template file.
+    /// Optional server-default chat template override, provided either as an
+    /// inline template or as a path to a template file.
     pub chat_template: Option<String>,
-    /// Optional server-default keyword arguments merged into every chat-template render before
-    /// request-level `chat_template_kwargs`.
+    /// Optional server-default keyword arguments merged into every
+    /// chat-template render before request-level `chat_template_kwargs`.
     pub default_chat_template_kwargs: HashMap<String, Value>,
 }
 

diff --git a/src/chat/src/event.rs b/src/chat/src/event.rs
@@ -80,7 +80,8 @@ impl [AssistantContentBlock] {
         .filter(|s: &String| !s.is_empty())
     }
 
-    /// Return whether this assistant message contains any non-empty reasoning text blocks.
+    /// Return whether this assistant message contains any non-empty reasoning
+    /// text blocks.
     pub fn has_reasoning(&self) -> bool {
         self.iter().any(|block| match block {
             AssistantContentBlock::Reasoning { text } => !text.is_empty(),
@@ -95,8 +96,7 @@ impl [AssistantContentBlock] {
 
     /// Return whether this assistant message contains any tool-call blocks.
     pub fn has_tool_calls(&self) -> bool {
-        self.iter()
-            .any(|block| matches!(block, AssistantContentBlock::ToolCall(_)))
+        self.iter().any(|block| matches!(block, AssistantContentBlock::ToolCall(_)))
     }
 }
 
@@ -124,7 +124,8 @@ impl AssistantMessage {
 /// Streamed chat event emitted by [`crate::ChatEventStream`].
 #[derive(Debug, Clone, PartialEq)]
 pub enum ChatEvent {
-    /// The request was accepted, streaming has started, and prompt metadata is ready.
+    /// The request was accepted, streaming has started, and prompt metadata is
+    /// ready.
     Start {
         /// The actual prompt token IDs for this request.
         prompt_token_ids: Arc<[u32]>,
@@ -158,14 +159,16 @@ pub enum ChatEvent {
         id: String,
         name: String,
     },
-    /// One incremental tool-call arguments delta for the currently open tool call.
+    /// One incremental tool-call arguments delta for the currently open tool
+    /// call.
     ToolCallArgumentsDelta { index: usize, delta: String },
     /// One tool call has ended.
     ToolCallEnd {
         index: usize,
         call: AssistantToolCall,
     },
-    /// Terminal event carrying the final assembled assistant message and finish metadata.
+    /// Terminal event carrying the final assembled assistant message and finish
+    /// metadata.
     Done {
         message: AssistantMessage,
         /// Number of prompt tokens actually sent to the engine after chat

diff --git a/src/chat/src/lib.rs b/src/chat/src/lib.rs
@@ -1,10 +1,11 @@
 //! Minimal chat facade above [`vllm_text`].
 //!
 //! This crate keeps the northbound boundary intentionally small:
-//! `messages -> rendered prompt -> tokenized prompt -> engine request -> streamed structured
-//! assistant events`. The request side remains text-first, while the response side can emit
-//! structured reasoning and final-answer blocks. It is closer to vLLM's internal chat-rendering
-//! flow than to a full OpenAI-compatible surface.
+//! `messages -> rendered prompt -> tokenized prompt -> engine request ->
+//! streamed structured assistant events`. The request side remains text-first,
+//! while the response side can emit structured reasoning and final-answer
+//! blocks. It is closer to vLLM's internal chat-rendering flow than to a full
+//! OpenAI-compatible surface.
 
 pub use backend::hf::HfChatBackend;
 pub use backend::{
@@ -90,8 +91,9 @@ pub fn validate_parser_overrides(
 
 /// Structured chat facade above [`TextLlm`].
 ///
-/// This layer stays above raw text semantics: it takes care of chat-template rendering, exposes
-/// structured assistant events, and adds chat-specific request semantics such as tool calls.
+/// This layer stays above raw text semantics: it takes care of chat-template
+/// rendering, exposes structured assistant events, and adds chat-specific
+/// request semantics such as tool calls.
 pub struct ChatLlm {
     text: TextLlm,
     backend: DynChatBackend,
@@ -102,7 +104,8 @@ pub struct ChatLlm {
 }
 
 impl ChatLlm {
-    /// Create a new chat facade from a text-generation facade plus a chat backend.
+    /// Create a new chat facade from a text-generation facade plus a chat
+    /// backend.
     pub fn new(text: TextLlm, backend: DynChatBackend) -> Self {
         Self {
             text,
@@ -112,8 +115,8 @@ impl ChatLlm {
         }
     }
 
-    /// Convenience constructor for one shared backend object that implements both text and chat
-    /// responsibilities.
+    /// Convenience constructor for one shared backend object that implements
+    /// both text and chat responsibilities.
     pub fn from_shared_backend(llm: Llm, backend: DynChatTextBackend) -> Self {
         let text = TextLlm::new(llm, backend.clone());
         Self::new(text, backend)
@@ -131,7 +134,8 @@ impl ChatLlm {
         self
     }
 
-    /// Expose the underlying text facade for raw text-generation routes such as `/v1/completions`.
+    /// Expose the underlying text facade for raw text-generation routes such as
+    /// `/v1/completions`.
     pub fn text(&self) -> &TextLlm {
         &self.text
     }
@@ -141,7 +145,8 @@ impl ChatLlm {
         self.text.model_id()
     }
 
-    /// Expose the underlying engine-core client for low-level utility/admin calls.
+    /// Expose the underlying engine-core client for low-level utility/admin
+    /// calls.
     pub fn engine_core_client(&self) -> &EngineCoreClient {
         self.text.engine_core_client()
     }
@@ -171,12 +176,7 @@ impl ChatLlm {
             add_special_tokens: request.add_special_tokens,
             data_parallel_rank: request.data_parallel_rank,
         };
-        let decoded_stream = self
-            .text
-            .generate(text_request)
-            .await?
-            .map_err(Error::from)
-            .boxed();
+        let decoded_stream = self.text.generate(text_request).await?.map_err(Error::from).boxed();
 
         let structured_stream = output_processor.process(decoded_stream)?;
 

diff --git a/src/chat/src/output/default/mod.rs b/src/chat/src/output/default/mod.rs
@@ -28,21 +28,25 @@ trait_set! {
     trait ContentEventStream = Stream<Item = Result<ContentEvent>> + Send + 'static;
 }
 
-/// Default request-scoped output processor used by Hugging Face style chat backends.
+/// Default request-scoped output processor used by Hugging Face style chat
+/// backends.
 ///
-/// This implementation assumes the backend already emitted decoded text deltas, then optionally
-/// layers reasoning parsing and tool-call parsing before assembling final structured chat events.
+/// This implementation assumes the backend already emitted decoded text deltas,
+/// then optionally layers reasoning parsing and tool-call parsing before
+/// assembling final structured chat events.
 pub struct DefaultChatOutputProcessor {
     intermediate: bool,
     reasoning_parser: Option<Box<dyn ReasoningParser>>,
     tool_parser: Option<Box<dyn ToolParser>>,
 }
 
 impl DefaultChatOutputProcessor {
-    /// Build the default output processor and apply any parser-specific request adjustments.
+    /// Build the default output processor and apply any parser-specific request
+    /// adjustments.
     ///
-    /// Parser resolution happens here so that request validation, prompt rendering, and streaming
-    /// all observe the same parser-adjusted request state.
+    /// Parser resolution happens here so that request validation, prompt
+    /// rendering, and streaming all observe the same parser-adjusted
+    /// request state.
     pub fn new(
         request: &mut ChatRequest,
         model_id: &str,
@@ -77,8 +81,9 @@ impl DefaultChatOutputProcessor {
 
     /// Build the plain-text-only default output processor.
     ///
-    /// This keeps the default structured chat-event assembly but disables both reasoning parsing
-    /// and tool-call parsing completely, so that all content is treated as opaque text.
+    /// This keeps the default structured chat-event assembly but disables both
+    /// reasoning parsing and tool-call parsing completely, so that all
+    /// content is treated as opaque text.
     pub fn plain_text_only() -> Self {
         Self {
             intermediate: false,
@@ -106,13 +111,11 @@ impl DefaultChatOutputProcessor {
 
         let parser = factory.create(parser_name, &request.tools)?;
 
-        parser
-            .adjust_request(request)
-            .map_err(|error| Error::ParserInitialization {
-                kind: "tool",
-                name: parser_name.to_string(),
-                error: error.into(),
-            })?;
+        parser.adjust_request(request).map_err(|error| Error::ParserInitialization {
+            kind: "tool",
+            name: parser_name.to_string(),
+            error: error.into(),
+        })?;
 
         TOOL_PARSER_LOG_ONCE.call_once(|| info!(parser_name, "using tool parser"));
         Ok(parser)
@@ -138,13 +141,11 @@ impl DefaultChatOutputProcessor {
 
         let parser = factory.create(parser_name, tokenizer)?;
 
-        parser
-            .adjust_request(request)
-            .map_err(|error| Error::ParserInitialization {
-                kind: "reasoning",
-                name: parser_name.to_string(),
-                error: error.into(),
-            })?;
+        parser.adjust_request(request).map_err(|error| Error::ParserInitialization {
+            kind: "reasoning",
+            name: parser_name.to_string(),
+            error: error.into(),
+        })?;
 
         REASONING_PARSER_LOG_ONCE.call_once(|| info!(parser_name, "using reasoning parser"));
         Ok(Some(parser))
@@ -155,8 +156,9 @@ static TOOL_PARSER_LOG_ONCE: Once = Once::new();
 static REASONING_PARSER_LOG_ONCE: Once = Once::new();
 
 impl ChatOutputProcessor for DefaultChatOutputProcessor {
-    /// Transforms a raw generate-output token stream into structured chat events
-    /// through three sequential stages once text decoding has already happened:
+    /// Transforms a raw generate-output token stream into structured chat
+    /// events through three sequential stages once text decoding has
+    /// already happened:
     ///
     /// 1. [`reasoning_event_stream`] — reasoning/content separation
     /// 2. [`tool_event_stream`] — tool-call parsing

diff --git a/src/chat/src/output/default/reasoning.rs b/src/chat/src/output/default/reasoning.rs
@@ -35,7 +35,8 @@ impl ReasoningState {
         }
     }
 
-    /// Convert one decoded text delta into zero or more semantic assistant deltas.
+    /// Convert one decoded text delta into zero or more semantic assistant
+    /// deltas.
     fn process_delta(&mut self, delta: String) -> Vec<ContentEvent> {
         // If the parser has already failed, skip parsing and return plain text deltas.
         if self.parser_failed {

diff --git a/src/chat/src/output/default/tool.rs b/src/chat/src/output/default/tool.rs
@@ -184,9 +184,10 @@ fn push_text_delta(events: &mut Vec<AssistantEvent>, kind: AssistantBlockKind, d
 
 /// Tool parsing when `intermediate=false` (`FinalOnly` mode).
 ///
-/// We keep this separate because some adaptor-backed parsers may not correctly handle the full text
-/// passed to incremental `push` interface, but override `parse_complete()` with a dedicated
-/// one-shot implementation to ensure correctness.
+/// We keep this separate because some adaptor-backed parsers may not correctly
+/// handle the full text passed to incremental `push` interface, but override
+/// `parse_complete()` with a dedicated one-shot implementation to ensure
+/// correctness.
 #[try_stream]
 async fn final_only_tool_event_stream(
     stream: impl ContentEventStream,