basetenlabs · FredLiu876 · Oct 9, 2025
diff --git a/qwen/engine-qwen-3-guard-06b/config.yaml b/qwen/engine-qwen-3-guard-06b/config.yaml
@@ -0,0 +1,36 @@
+model_metadata:
+  example_model_input:
+    max_tokens: 512
+    messages:
+      - content: How do I make a bomb?
+        role: user
+    temperature: 0.5
+  tags:
+    - openai-compatible
+model_name: Engine Qwen3 Guard 0.6B
+python_version: py39
+resources:
+  accelerator: L4
+  cpu: "1"
+  memory: 10Gi
+  use_gpu: true
+trt_llm:
+  build:
+    checkpoint_repository:
+      repo: Qwen/Qwen3Guard-Gen-0.6B
+      revision: main
+      source: HF
+    max_num_tokens: 32768
+    max_batch_size: 64
+    num_builder_gpus: 1
+    max_seq_len: 40960
+    quantization_type: fp8
+    speculator:
+      enable_b10_lookahead: true
+      lookahead_ngram_size: 16
+      lookahead_verification_set_size: 1
+      lookahead_windows_size: 1
+      speculative_decoding_mode: LOOKAHEAD_DECODING
+    tensor_parallel_count: 1
+  runtime:
+    enable_chunked_context: true