From 2f660645eb1e086963eb58b69c10b75fd4480937 Mon Sep 17 00:00:00 2001 From: Siddartha Pothapragada Date: Wed, 6 May 2026 09:26:53 -0700 Subject: [PATCH] Fix C++ -Werror regressions in llama runner (#19326) Summary: Fixes 3 `-Werror` diagnostics that broke the qualcomm llama runner build on `cfg:android-arm64-clang19-no-san` and disabled the following test infra targets: - `xplat/executorch/examples/qualcomm/oss_scripts/llama:runner_lib` - `xplat/executorch/examples/qualcomm/oss_scripts/llama:runner_lib_static` - `xplat/executorch/examples/qualcomm/oss_scripts/llama:qnn_llama_runner` - `xplat/executorch/examples/qualcomm/oss_scripts/llama:qnn_llama_runner_static` Three diagnostics fixed: 1. `-Wreorder-ctor` in `runner.cpp`: `attention_sink_rope_module_` is declared as the 2nd field of `Runner` (right after `module_`) but the constructor initializer list appended it last, after `tokenizer_`. Moved it to the correct position in the init list to match declaration order. Recent regression introduced in the attention-sink diff (#16574). 2. `-Woverloaded-virtual` in `lhd_token_generator.h` and `multimodal_lhd_token_generator.h`: the derived classes define a `prepare_io(std::vector, std::vector)` overload that hides the base class virtual `prepare_io(uint64_t, int64_t)`. Added a `using TokenGenerator::prepare_io;` (and equivalent for the multimodal hierarchy) declaration so the base virtual stays in scope and the warning is silenced without changing behavior. Latent bug surfaced by the clang19 toolchain bump. 3. `-Wdelete-non-abstract-non-virtual-dtor` in `prompt_processor.h`: `PromptProcessor` has virtual member functions but no virtual destructor, so deleting via `std::unique_ptr>` in `Runner` was undefined behavior under strict warnings. Added `virtual ~PromptProcessor() = default;` mirroring the pattern already used in `TokenGenerator` (`token_generator.h`). Also transitively fixes `MultimodalPromptProcessor`. Reviewed By: rascani Differential Revision: D103991803 --- .../qualcomm/oss_scripts/llama/runner/lhd_token_generator.h | 3 +++ .../runner/multimodal_runner/multimodal_lhd_token_generator.h | 3 +++ examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h | 2 ++ examples/qualcomm/oss_scripts/llama/runner/runner.cpp | 4 ++-- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h index e97f64b7c1d..796dde88014 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h +++ b/examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.h @@ -102,6 +102,9 @@ class LhdTokenGenerator : public TokenGenerator { AttentionSinkRopeRunner* attention_sink_rope_runner) override; private: + // Bring base class's virtual prepare_io into scope so the overload below + // does not hide it (-Woverloaded-virtual). + using TokenGenerator::prepare_io; /** * @brief Fill in I/O buffers with prompt token and position. * @param cur_token Current token. diff --git a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h index 83da9e7a6ba..7494afec6da 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h +++ b/examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.h @@ -108,6 +108,9 @@ class MultimodalLhdTokenGenerator AttentionSinkRopeRunner* attention_sink_rope_runner) override; private: + // Bring base class's virtual prepare_io into scope so the overload below + // does not hide it (-Woverloaded-virtual). + using TokenGenerator::prepare_io; /** * @brief Fill in I/O buffers with prompt token and position. * @param cur_token Current token. diff --git a/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h b/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h index 0790985d231..599f7050d83 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h +++ b/examples/qualcomm/oss_scripts/llama/runner/prompt_processor.h @@ -40,6 +40,8 @@ class PromptProcessor { const std::string& method_name, Metadata metadata); + virtual ~PromptProcessor() = default; + /** * @brief Initialize I/O tensor and allocate I/O data buffer. * @param buffer_manager Pointer to IMemAlloc instance; by default, it uses a diff --git a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp index 0e9b7860dbd..0a4a8b9abb5 100644 --- a/examples/qualcomm/oss_scripts/llama/runner/runner.cpp +++ b/examples/qualcomm/oss_scripts/llama/runner/runner.cpp @@ -102,6 +102,7 @@ Runner::Runner( std::unique_ptr tokenizer, std::unique_ptr attention_sink_rope_module) : module_(std::move(module)), + attention_sink_rope_module_(std::move(attention_sink_rope_module)), ngram_(ngram), window_(window), gcap_(gcap), @@ -111,8 +112,7 @@ Runner::Runner( temperature_(temperature), eval_mode_(static_cast(eval_mode)), shared_buffer_(shared_buffer), - tokenizer_(std::move(tokenizer)), - attention_sink_rope_module_(std::move(attention_sink_rope_module)) { + tokenizer_(std::move(tokenizer)) { stats_.reset(); if (decoder_model_version == "llama2") {