Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 75 additions & 25 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ anyhow = "1.0.100"
arc-swap = "1.9.0"
async-openai = "0.33.1"
async-trait = "0.1.89"
asynk-strim-attr = "0.1.0"
axum = "0.8.8"
base64 = "0.22.1"
byteorder = "1.5.0"
Expand All @@ -31,7 +32,6 @@ enum-as-inner = "0.7.0"
expect-test = "1.5.1"
fastokens = { git = "https://github.com/BugenZhao/fastokens.git", rev = "12a865a1f13aaae8f7b14bab1f177bba30577ad7" }
futures = "0.3.31"
futures-async-stream = "0.2.13"
hex = "0.4.3"
hf-hub = { version = "0.5.0", features = ["tokio"] }
http-body = "1.0.1"
Expand Down
2 changes: 1 addition & 1 deletion src/chat/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ test-util = []

[dependencies]
anyhow.workspace = true
asynk-strim-attr.workspace = true
easy-ext.workspace = true
futures.workspace = true
futures-async-stream.workspace = true
minijinja.workspace = true
minijinja-contrib.workspace = true
openai-harmony.workspace = true
Expand Down
1 change: 0 additions & 1 deletion src/chat/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#![feature(coroutines)]
#![feature(trait_alias)]

//! Minimal chat facade above [`vllm_text`].
Expand Down
30 changes: 18 additions & 12 deletions src/chat/src/output/default/reasoning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
//! separation: `decoded.rs` still only produces plain text deltas, while later
//! stages consume the semantic `Text` / `Reasoning` split emitted here.

use asynk_strim_attr::{TryYielder, try_stream};
use futures::{StreamExt as _, pin_mut};
use futures_async_stream::try_stream;
use thiserror_ext::AsReport;
use tracing::warn;
use vllm_text::output::DecodedTextEvent;

use super::ContentEvent;
use crate::Result;
use crate::error::Error;
use crate::event::AssistantBlockKind;
use crate::output::DecodedTextEventStream;
Expand Down Expand Up @@ -122,18 +123,19 @@ fn push_reasoning_delta(events: &mut Vec<ContentEvent>, delta: ReasoningDelta) {
}

/// Wrap one decoded-text stream into the internal reasoning event stream.
#[try_stream(ok = ContentEvent, error = Error)]
#[try_stream]
pub(crate) async fn reasoning_event_stream(
decoded_stream: impl DecodedTextEventStream,
reasoning_parser: Option<Box<dyn ReasoningParser>>,
) {
mut y: TryYielder<ContentEvent, Error>,
) -> Result<()> {
pin_mut!(decoded_stream);

// Without a parser, pass through as plain text deltas.
let Some(reasoning_parser) = reasoning_parser else {
while let Some(event) = decoded_stream.next().await.transpose()? {
for next in ContentEvent::from_decoded_plain_text(event) {
yield next;
y.yield_ok(next).await;
}
}
return Ok(());
Expand All @@ -148,10 +150,11 @@ pub(crate) async fn reasoning_event_stream(
prompt_logprobs,
} => {
state.initialize(&prompt_token_ids);
yield ContentEvent::Start {
y.yield_ok(ContentEvent::Start {
prompt_token_ids,
prompt_logprobs,
}
})
.await;
}
DecodedTextEvent::TextDelta {
delta,
Expand All @@ -160,28 +163,31 @@ pub(crate) async fn reasoning_event_stream(
finished,
} => {
for next in state.process_delta(delta) {
yield next;
y.yield_ok(next).await;
}
if logprobs.is_some() || !token_ids.is_empty() {
yield ContentEvent::LogprobsDelta {
y.yield_ok(ContentEvent::LogprobsDelta {
logprobs,
token_ids,
};
})
.await;
}
if let Some(finished) = finished {
for next in state.finish() {
yield next;
y.yield_ok(next).await;
}
yield ContentEvent::Done {
y.yield_ok(ContentEvent::Done {
prompt_token_count: finished.prompt_token_count,
output_token_count: finished.output_token_count,
finish_reason: finished.finish_reason,
kv_transfer_params: finished.kv_transfer_params,
};
})
.await;
}
}
}
}
Ok(())
}

#[cfg(test)]
Expand Down
Loading
Loading