Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions src/chat/src/renderer/deepseek_v32/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,12 @@ enum ThinkingMode {
}

/// Tool schema shape rendered inside the `<functions>` block.
#[serde_with::skip_serializing_none]
#[derive(Debug, Serialize)]
struct RenderedToolSchema<'a> {
name: &'a str,
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<&'a str>,
parameters: &'a Value,
#[serde(skip_serializing_if = "Option::is_none")]
strict: Option<bool>,
}

Expand Down
3 changes: 1 addition & 2 deletions src/chat/src/renderer/deepseek_v4/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,12 @@ enum ThinkingMode {
Thinking,
}

#[serde_with::skip_serializing_none]
#[derive(Debug, Serialize)]
struct RenderedToolSchema<'a> {
name: &'a str,
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<&'a str>,
parameters: &'a Value,
#[serde(skip_serializing_if = "Option::is_none")]
strict: Option<bool>,
}

Expand Down
1 change: 1 addition & 0 deletions src/engine-core-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ serde_default.workspace = true
serde_json.workspace = true
serde_repr.workspace = true
serde_tuple.workspace = true
serde_with.workspace = true
task-local.workspace = true
tempfile = { workspace = true, optional = true }
thiserror.workspace = true
Expand Down
32 changes: 12 additions & 20 deletions src/engine-core-client/src/protocol/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,34 +152,29 @@ pub enum StopReason {
///
/// Original Python definition:
/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/sampling_params.py#L36-L107>
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
#[serde(default)]
pub struct StructuredOutputsParams {
/// JSON schema (as a dict/object or JSON string) constraining the output.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub json: Option<serde_json::Value>,
/// Regular expression the output must match.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub regex: Option<String>,
/// List of allowed output strings (the model must produce one of these).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub choice: Option<Vec<String>>,
/// Context-free grammar (in EBNF-like notation) the output must conform to.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub grammar: Option<String>,
/// When `true`, output must be valid JSON (free-form, no schema).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub json_object: Option<bool>,
/// Disable any additional whitespace in guided JSON output.
#[serde(default, skip_serializing_if = "crate::protocol::is_false")]
#[serde(skip_serializing_if = "crate::protocol::is_false")]
pub disable_any_whitespace: bool,
/// Disable `additionalProperties` in JSON schema output.
#[serde(default, skip_serializing_if = "crate::protocol::is_false")]
#[serde(skip_serializing_if = "crate::protocol::is_false")]
pub disable_additional_properties: bool,
/// Custom whitespace pattern for guided JSON output.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub whitespace_pattern: Option<String>,
/// Structural tag configuration (JSON-encoded string).
#[serde(default, skip_serializing_if = "Option::is_none")]
pub structural_tag: Option<String>,
}

Expand All @@ -192,6 +187,7 @@ pub struct StructuredOutputsParams {
///
/// Original Python definition:
/// <https://github.com/vllm-project/vllm/blob/f22d6e026798a74e6542a52ef776c054f2de572a/vllm/sampling_params.py#L155-L291>
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct EngineCoreSamplingParams {
/// Controls randomness. Lower values are more deterministic; zero means
Expand Down Expand Up @@ -240,33 +236,29 @@ pub struct EngineCoreSamplingParams {
pub all_stop_token_ids: BTreeSet<u32>,
/// Logit biases to apply during sampling.
/// Keys are token IDs
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub logit_bias: Option<HashMap<u32, f32>>,
/// Restrict output to these token IDs only.
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub allowed_token_ids: Option<Vec<u32>>,
/// Tokenized bad words to avoid during generation.
#[serde(
default,
skip_serializing_if = "Option::is_none",
rename = "_bad_words_token_ids"
)]
#[serde(default, rename = "_bad_words_token_ids")]
pub bad_words_token_ids: Option<Vec<Vec<u32>>>,
/// Parameters for configuring structured outputs (guided decoding).
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub structured_outputs: Option<StructuredOutputsParams>,
/// Specific token IDs for which log probabilities should be returned at each position.
///
/// When set, the engine returns logprobs for exactly these tokens in addition to the
/// sampled/scored token. Mutually exclusive with the `logprobs` count field in practice.
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub logprob_token_ids: Option<Vec<u32>>,
/// If `Some(true)`, the request will not attempt to read from the prefix cache; newly
/// computed blocks may still populate the cache. `None` defers to engine-core defaults.
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub skip_reading_prefix_cache: Option<bool>,
/// Additional request parameters for custom extensions (from `vllm_xargs`).
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub extra_args: Option<HashMap<String, serde_json::Value>>,
}

Expand Down
5 changes: 3 additions & 2 deletions src/server/src/routes/openai/utils/structured_outputs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@ use crate::error::ApiError;
/// JSON schema specification nested inside a `json_schema` response format.
///
/// Mirrors the Python vLLM `JsonSchemaResponseFormat` class.
#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct JsonSchemaFormat {
pub name: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub description: Option<String>,
/// The actual JSON schema object.
#[serde(alias = "json_schema")]
pub schema: Value,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub strict: Option<bool>,
}

Expand Down
2 changes: 1 addition & 1 deletion src/server/src/routes/openai/utils/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ pub enum ContentPart {
VideoUrl { video_url: VideoUrl },
}

#[serde_with::skip_serializing_none]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
pub struct ImageUrl {
pub url: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub detail: Option<String>,
Comment thread
BugenZhao marked this conversation as resolved.
}

Expand Down
Loading