-
Notifications
You must be signed in to change notification settings - Fork 73
feat(mlx-grpc): support string stop sequences for chat and completion #1447
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e60ab13
6f39089
15c1227
271cec9
486da46
94eef5a
b938bc9
fac846f
5ddd72c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,8 @@ | |
| use std::collections::HashMap; | ||
|
|
||
| use futures_util::StreamExt; | ||
| use llm_tokenizer::traits::Tokenizer; | ||
| use openai_protocol::common::StringOrArray; | ||
| use smg_grpc_client::{ | ||
| mlx_engine::AbortOnDropStream as MlxStream, | ||
| mlx_proto::{self as mlx}, | ||
|
|
@@ -17,6 +19,8 @@ use smg_grpc_client::{ | |
| vllm_proto::{self as vllm, generate_complete::MatchedStop as VllmMatchedStop}, | ||
| }; | ||
|
|
||
| use crate::routers::grpc::utils::resolve_mlx_matched_stop_json; | ||
|
|
||
| // ===================== | ||
| // Multimodal Data | ||
| // ===================== | ||
|
|
@@ -738,6 +742,14 @@ impl ProtoGenerateComplete { | |
| matches!(self, Self::Mlx(_)) | ||
| } | ||
|
|
||
| /// Return the raw matched stop token ID for MLX responses; None for all other backends. | ||
| fn mlx_matched_stop_token_id(&self) -> Option<u32> { | ||
| match self { | ||
| Self::Mlx(c) => c.matched_stop_token_id, | ||
| _ => None, | ||
| } | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
|
|
||
| /// Get token IDs from either backend (output_ids in proto) | ||
| pub fn token_ids(&self) -> &[u32] { | ||
| match self { | ||
|
|
@@ -795,6 +807,10 @@ impl ProtoGenerateComplete { | |
| /// - MatchedTokenId → Number | ||
| /// - MatchedStopStr → String | ||
| /// - None → None | ||
| #[expect( | ||
| clippy::unreachable, | ||
| reason = "MLX must use matched_stop_json_with_context" | ||
| )] | ||
| pub fn matched_stop_json(&self) -> Option<serde_json::Value> { | ||
| macro_rules! convert { | ||
| ($oneof:expr, $token_id:path, $stop_str:path) => { | ||
|
|
@@ -820,9 +836,31 @@ impl ProtoGenerateComplete { | |
| TrtllmMatchedStop::MatchedTokenId, | ||
| TrtllmMatchedStop::MatchedStopStr | ||
| ), | ||
| Self::Mlx(c) => c | ||
| .matched_stop_token_id | ||
| .map(|id| serde_json::Value::Number(id.into())), | ||
| // MLX requires request context to resolve the token ID; use matched_stop_json_with_context. | ||
| Self::Mlx(_) => unreachable!("matched_stop_json called for MLX backend"), | ||
|
zach-li-sudo marked this conversation as resolved.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Changing Useful? React with 👍 / 👎. |
||
| } | ||
| } | ||
|
|
||
| /// Resolve the matched stop for any backend, using request context for MLX. | ||
| /// | ||
| /// MLX only stores a token ID; this maps it back to the user-facing string or integer | ||
| /// (see `chat_utils::resolve_mlx_matched_stop_json`). All other backends return | ||
| /// `matched_stop_json()` directly. | ||
| pub fn matched_stop_json_with_context( | ||
| &self, | ||
| stop: Option<&StringOrArray>, | ||
| stop_token_ids: Option<&Vec<u32>>, | ||
| tokenizer: &dyn Tokenizer, | ||
| ) -> Option<serde_json::Value> { | ||
| if self.is_mlx() { | ||
| resolve_mlx_matched_stop_json( | ||
| self.mlx_matched_stop_token_id(), | ||
| stop, | ||
| stop_token_ids, | ||
| tokenizer, | ||
| ) | ||
| } else { | ||
| self.matched_stop_json() | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update the comment to mention all three pipelines that still reject string stops.
The comment states that "Messages and Generate pipelines still reject string stops," but the Responses pipeline (line 422) also retains the
reject_stop_stringscheck. For completeness, the comment should list all three.📝 Proposed fix to make the documentation complete
📝 Committable suggestion
🤖 Prompt for AI Agents