Fix Qwen3Next import errors on old transformers versions (#2030)

fynnsu · dsikka · web-flow · commit 6fea888029eb · 2025-11-13T21:43:43.000Z
SUMMARY:
Currently, we support `transformers&gt;=4.54.0`, however `Qwen3Next` was
only added in `transformers==4.57.0`. This updates the code so that:
1. In `src/llmcompressor/modeling/qwen3_next_moe.py` we only used the
imports for typing, so they've been moved into an `if TYPE_CHECKING`
block.
2. In `tests/llmcompressor/modeling/test_calib_qwen3_next.py` we wrap
the import statement in a try-except and skip the Qwen3Next test if they
fail to import.


TEST PLAN:
Run CI.

---------

Signed-off-by: Fynn Schmitt-Ulms &lt;fschmitt@redhat.com&gt;
Co-authored-by: Dipika Sikka &lt;dipikasikka1@gmail.com&gt;
diff --git a/src/llmcompressor/modeling/qwen3_next_moe.py b/src/llmcompressor/modeling/qwen3_next_moe.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 # coding=utf-8
 # Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team.
 # All rights reserved.
@@ -13,19 +15,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import TYPE_CHECKING
 
 import torch
 
 from llmcompressor.modeling.moe_context import MoECalibrationModule
 
-
-@MoECalibrationModule.register("Qwen3NextSparseMoeBlock")
-class CalibrationQwen3NextSparseMoeBlock(MoECalibrationModule):
+if TYPE_CHECKING:
     from transformers import Qwen3NextConfig
     from transformers.models.qwen3_next.modeling_qwen3_next import (
         Qwen3NextSparseMoeBlock,
     )
 
+
+@MoECalibrationModule.register("Qwen3NextSparseMoeBlock")
+class CalibrationQwen3NextSparseMoeBlock(MoECalibrationModule):
     """
     Calibration version of Qwen3NextSparseMoeBlock that sends all tokens to all experts.
     """
diff --git a/tests/llmcompressor/modeling/test_calib_qwen3_next.py b/tests/llmcompressor/modeling/test_calib_qwen3_next.py
@@ -11,9 +11,22 @@
 from llmcompressor.utils.helpers import DisableQuantization, calibration_forward_context
 from tests.testing_utils import requires_cadence, requires_gpu
 
+try:
+    from transformers import Qwen3NextConfig
+    from transformers.models.qwen3_next.modeling_qwen3_next import (
+        Qwen3NextSparseMoeBlock,
+    )
+except ImportError:
+    Qwen3NextConfig = None
+    Qwen3NextSparseMoeBlock = None
+
 
 @requires_cadence("weekly")
 @pytest.mark.parametrize("model_stub", ["Qwen/Qwen3-Next-80B-A3B-Instruct"])
+@pytest.mark.skipif(
+    Qwen3NextConfig is None,
+    reason="Qwen3Next not available in this version of transformers",
+)
 def test_calib_replace_qwen3moe_all_experts(model_stub):
     with skip_weights_download():
         model = AutoModelForCausalLM.from_pretrained(model_stub)
@@ -60,12 +73,11 @@ def hook_fn(i, module, input, output):
 
 
 @requires_gpu
+@pytest.mark.skipif(
+    Qwen3NextConfig is None,
+    reason="Qwen3Next not available in this version of transformers",
+)
 def test_calib_qwen3_moe_module():
-    from transformers import Qwen3NextConfig
-    from transformers.models.qwen3_next.modeling_qwen3_next import (
-        Qwen3NextSparseMoeBlock,
-    )
-
     config = Qwen3NextConfig()
     with torch.device("cuda"):
         original = Qwen3NextSparseMoeBlock(config).eval()