Skip to content

Commit dfbb0a0

Browse files
committed
fix test
1 parent e1767b2 commit dfbb0a0

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

tests/quantization/torchao_integration/test_torchao.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -269,13 +269,13 @@ def test_autoquant(self):
269269
)
270270
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
271271
input_ids = tokenizer(self.input_text, return_tensors="pt").to(torch_device)
272-
output = quantized_model.generate(**input_ids, max_new_tokens=self.max_new_tokens)
272+
output = quantized_model.generate(**input_ids, max_new_tokens=self.max_new_tokens, cache_implementation="static", disable_compile=True)
273273
quantized_model.finalize_autoquant()
274274

275275
check_autoquantized(self, quantized_model.model.layers[0].self_attn.v_proj)
276276

277-
EXPECTED_OUTPUT = "What are we having for dinner?\n\nJessica: (smiling)"
278-
output = quantized_model.generate(**input_ids, max_new_tokens=self.max_new_tokens)
277+
EXPECTED_OUTPUT = 'What are we having for dinner?\n\n10. "Dinner is ready'
278+
output = quantized_model.generate(**input_ids, max_new_tokens=self.max_new_tokens, cache_implementation="static", disable_compile=True)
279279
self.assertEqual(tokenizer.decode(output[0], skip_special_tokens=True), EXPECTED_OUTPUT)
280280

281281

0 commit comments

Comments
 (0)