Skip to content

Commit 21a0191

Browse files
authored
Arm backend: Reduce Qwen3 VL E2E footprint (#20328)
Use a compact synthetic Qwen3 VL config for full-model E2E tests. Keep real checkpoint shapes in layer tests, where memory is bounded. This avoids instantiating the full 2B text stack in nightly CI. Signed-off-by: Baris Demir <baris.demir@arm.com> Change-Id: I2d53cb76090b564c487558f8e88706ab0f01ed81 cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils @Sebastian-Larsson @robell @rascani
1 parent c37b6c3 commit 21a0191

1 file changed

Lines changed: 29 additions & 2 deletions

File tree

backends/arm/test/models/Qwen3_VL/test_qwen3_vl_model.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,33 @@ def _make_qwen3_vl_2b_instruct_layer_config():
3434
return config
3535

3636

37+
def _make_qwen3_vl_e2e_test_config():
38+
config = _make_qwen3_vl_2b_instruct_layer_config()
39+
40+
config.text_config.vocab_size = 1024
41+
config.text_config.bos_token_id = 1
42+
config.text_config.eos_token_id = 2
43+
config.text_config.hidden_size = 128
44+
config.text_config.intermediate_size = 384
45+
config.text_config.num_hidden_layers = 2
46+
config.text_config.num_attention_heads = 4
47+
config.text_config.num_key_value_heads = 2
48+
config.text_config.head_dim = 32
49+
config.text_config.max_position_embeddings = 1024
50+
config.text_config.rope_parameters["mrope_section"] = [4, 4, 4]
51+
config.text_config.rope_scaling["mrope_section"] = [4, 4, 4]
52+
53+
config.vision_config.deepstack_visual_indexes = [0]
54+
config.vision_config.depth = 2
55+
config.vision_config.hidden_size = 128
56+
config.vision_config.intermediate_size = 512
57+
config.vision_config.num_heads = 4
58+
config.vision_config.num_position_embeddings = 16
59+
config.vision_config.out_hidden_size = 128
60+
61+
return config
62+
63+
3764
def _make_text_position_ids(
3865
batch_size: int, seq_length: int, device: torch.device
3966
) -> torch.Tensor:
@@ -98,7 +125,7 @@ def forward(
98125
@classmethod
99126
def prepare_model_and_inputs(cls):
100127
torch.manual_seed(0)
101-
config = _make_qwen3_vl_2b_instruct_layer_config()
128+
config = _make_qwen3_vl_e2e_test_config()
102129
model = cls(config).eval()
103130
input_ids = torch.randint(0, 128, (2, 8), dtype=torch.long)
104131
attention_mask = torch.ones_like(input_ids)
@@ -161,7 +188,7 @@ def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
161188
@classmethod
162189
def prepare_model_and_inputs(cls):
163190
torch.manual_seed(0)
164-
config = _make_qwen3_vl_2b_instruct_layer_config()
191+
config = _make_qwen3_vl_e2e_test_config()
165192
model = cls(config).eval()
166193
pixel_values = _make_pixel_values(config, torch.device("cpu"))
167194
return model, (pixel_values,)

0 commit comments

Comments
 (0)