fix eval_llama_qnn custom annotation (#15953)

DannyYuyang-quic · web-flow · commit 4d366239f473 · 2025-11-24T11:23:50.000-08:00
### Summary
Fix eval_llama_qnn: retrieve custom annotation from quantization recipe

### Test plan
``` bash
python -m executorch.examples.qualcomm.oss_scripts.llama.eval_llama_qnn --decoder_model qwen2_5-0_5b --quant_linear_only --max_seq_length 1024 --ptq 16a4w
```
diff --git a/examples/qualcomm/oss_scripts/llama/eval_llama_qnn.py b/examples/qualcomm/oss_scripts/llama/eval_llama_qnn.py
@@ -316,7 +316,9 @@ def eval_llm(args):
     if args.ptq is not None:
         quant_dtype = getattr(QuantDtype, f"use_{args.ptq}")
         decoder_model_config = SUPPORTED_LLM_MODELS[args.decoder_model]
-        custom_annotations = decoder_model_config.custom_annotation
+        custom_annotations = (
+            decoder_model_config.quant_recipe().recipe.custom_quant_annotations
+        )
 
         quantizer = make_custom_quantizer(
             quant_dtype, args.range_setting, custom_annotations, args.quant_linear_only