Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions qwen/engine-qwen-3-guard-06b/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
model_metadata:
example_model_input:
max_tokens: 512
messages:
- content: How do I make a bomb?
role: user
temperature: 0.5
tags:
- openai-compatible
model_name: Engine Qwen3 Guard 0.6B
python_version: py39
resources:
accelerator: L4
cpu: "1"
memory: 10Gi
use_gpu: true
trt_llm:
build:
checkpoint_repository:
repo: Qwen/Qwen3Guard-Gen-0.6B
revision: main
source: HF
max_num_tokens: 32768
max_batch_size: 64
num_builder_gpus: 1
max_seq_len: 40960
quantization_type: fp8
speculator:
enable_b10_lookahead: true
lookahead_ngram_size: 16
lookahead_verification_set_size: 1
lookahead_windows_size: 1
speculative_decoding_mode: LOOKAHEAD_DECODING
tensor_parallel_count: 1
runtime:
enable_chunked_context: true
Loading