vllm-project · dsikka · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,11 +16,6 @@ known-first-party = ["llmcompressor"]
 
 [tool.pytest.ini_options]
 markers = [
-    "smoke: quick tests to check basic functionality",
-    "sanity: tests to ensure that new changes do not break existing functionality",
-    "regression: detailed tests to ensure major functions work correctly",
-    "integration: tests which integrate with a third party service such as HF",
-    "unit: tests to ensure code correctness and regression test functionality",
     "example: tests for content in the 'examples' folder",
     "multi_gpu: tests that require multiple GPUs",
 ]

diff --git a/tests/unit/__init__.py → tests/llmcompressor/core/__init__.py b/tests/unit/__init__.py → tests/llmcompressor/core/__init__.py
diff --git a/tests/unit/core/__init__.py → tests/llmcompressor/core/events/__init__.py b/tests/unit/core/__init__.py → tests/llmcompressor/core/events/__init__.py
diff --git a/tests/unit/core/events/test_event.py → ...s/llmcompressor/core/events/test_event.py b/tests/unit/core/events/test_event.py → ...s/llmcompressor/core/events/test_event.py
@@ -1,55 +1,45 @@
-import pytest
-
 from llmcompressor.core import Event, EventType
 
 
-@pytest.mark.smoke
 def test_event_epoch_based():
     event = Event(steps_per_epoch=10)
     assert event.epoch_based is True
 
 
-@pytest.mark.smoke
 def test_event_epoch():
     event = Event(steps_per_epoch=10, global_step=25)
     assert event.epoch == 2
 
 
-@pytest.mark.smoke
 def test_event_epoch_full():
     event = Event(steps_per_epoch=10, global_step=25)
     assert event.epoch_full == 2.5
 
 
-@pytest.mark.smoke
 def test_event_epoch_step():
     event = Event(steps_per_epoch=10, global_step=25)
     assert event.epoch_step == 5
 
 
-@pytest.mark.smoke
 def test_event_epoch_batch():
     event = Event(
         steps_per_epoch=10, global_step=25, batches_per_step=2, global_batch=50
     )
     assert event.epoch_batch == 10
 
 
-@pytest.mark.smoke
 def test_event_current_index():
     event = Event(steps_per_epoch=10, global_step=25)
     assert event.current_index == 2.5
 
 
-@pytest.mark.smoke
 def test_event_should_update():
     event = Event(steps_per_epoch=10, global_step=25)
     assert event.should_update(start=0, end=30, update=2.5) is True
     assert event.should_update(start=0, end=20, update=5) is False
     assert event.should_update(start=0, end=30, update=0) is True
 
 
-@pytest.mark.smoke
 def test_event_new_instance():
     event = Event(type_=EventType.INITIALIZE, global_step=25)
     new_event = event.new_instance(global_step=30)

diff --git a/tests/unit/core/test_state.py → tests/llmcompressor/core/test_state.py b/tests/unit/core/test_state.py → tests/llmcompressor/core/test_state.py
@@ -1,10 +1,7 @@
-import pytest
-
 from llmcompressor.core.state import Data, Hardware, ModifiedState, State
 from llmcompressor.metrics import BaseLogger, LoggerManager
 
 
-@pytest.mark.smoke
 def test_state_initialization():
     state = State()
     assert state.model is None
@@ -20,7 +17,6 @@ def test_state_initialization():
     assert state._last_log_step is None
 
 
-@pytest.mark.smoke
 def test_modified_state_initialization():
     mod_state = ModifiedState(
         model="model",
@@ -34,7 +30,6 @@ def test_modified_state_initialization():
     assert mod_state.modifier_data == [{"key": "value"}]
 
 
-@pytest.mark.smoke
 def test_state_update():
     state = State()
     updated_data = {
@@ -63,7 +58,6 @@ def test_state_update():
     assert state.model_log_cadence == 2
 
 
-@pytest.mark.regression
 def test_state_sparsification_ready():
     state = State()
     assert not state.compression_ready
@@ -73,7 +67,6 @@ def test_state_sparsification_ready():
     assert state.compression_ready
 
 
-@pytest.mark.regression
 def test_state_update_loggers():
     state = State()
     logger1 = BaseLogger("test1", False)

diff --git a/tests/unit/test_logger.py → tests/llmcompressor/test_logger.py b/tests/unit/test_logger.py → tests/llmcompressor/test_logger.py
diff --git a/tests/llmcompressor/transformers/compression/test_quantization.py b/tests/llmcompressor/transformers/compression/test_quantization.py
@@ -97,7 +97,6 @@ def setup_model_and_config(request, tmpdir_factory):
 
 
 @requires_gpu
-@pytest.mark.integration
 def test_quantization_reload(setup_model_and_config):
     model, config, output_dir = setup_model_and_config
 
@@ -130,7 +129,6 @@ def test_quantization_reload(setup_model_and_config):
 
 
 @requires_gpu
-@pytest.mark.integration
 @torch.no_grad()
 def test_perplexity(setup_model_and_config):
     model, config, output_dir = setup_model_and_config

diff --git a/tests/llmcompressor/transformers/compression/test_recipe_parsing.py b/tests/llmcompressor/transformers/compression/test_recipe_parsing.py
@@ -52,28 +52,27 @@ def recipe_variants():
     ]
 
     yaml_string = """
-DEFAULT_stage:
-  DEFAULT_modifiers:
-    SmoothQuantModifier:
-      smoothing_strength: 0.8
-      mappings:
-      - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
-        - re:.*input_layernorm
-      - - ['re:.*gate_proj', 're:.*up_proj']
-        - re:.*post_attention_layernorm
-    GPTQModifier:
-      targets: Linear
-      ignore: [lm_head]
-      scheme: W8A8
-"""
+    DEFAULT_stage:
+    DEFAULT_modifiers:
+        SmoothQuantModifier:
+        smoothing_strength: 0.8
+        mappings:
+        - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
+            - re:.*input_layernorm
+        - - ['re:.*gate_proj', 're:.*up_proj']
+            - re:.*post_attention_layernorm
+        GPTQModifier:
+        targets: Linear
+        ignore: [lm_head]
+        scheme: W8A8
+    """
 
     yaml_path = str(Path(__file__).parent / "recipes" / "smoothquant_gptq_w8a8.yaml")
 
     return [python_modifiers, yaml_string, yaml_path]
 
 
 @requires_gpu
-@pytest.mark.integration
 @pytest.mark.parametrize(
     "recipe",
     recipe_variants(),

diff --git a/tests/llmcompressor/transformers/oneshot/test_api_inputs.py b/tests/llmcompressor/transformers/oneshot/test_api_inputs.py
@@ -7,9 +7,6 @@
 
 CONFIGS_DIRECTORY = "tests/llmcompressor/transformers/oneshot/oneshot_configs"
 
-# TODO: Seems better to mark test type (smoke, sanity, regression) as a marker as
-# opposed to using a field in the config file?
-
 
 @pytest.fixture(params=parse_params(CONFIGS_DIRECTORY))
 def one_shot_args(request):
@@ -45,8 +42,6 @@ def wrapped_preprocess_func(sample):
     return args
 
 
-@pytest.mark.smoke
-@pytest.mark.integration
 def test_one_shot_inputs(one_shot_args, tmp_path):
     oneshot(
         **one_shot_args,

diff --git a/tests/llmcompressor/transformers/sparsegpt/test_consecutive_runs.py b/tests/llmcompressor/transformers/sparsegpt/test_consecutive_runs.py
@@ -21,7 +21,6 @@
 )
 
 
-@pytest.mark.integration
 @pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
 def test_consecutive_runs_small(config, tmp_path):
     _test_consecutive_runs(
@@ -35,7 +34,6 @@ def test_consecutive_runs_small(config, tmp_path):
 
 
 @requires_gpu
-@pytest.mark.integration
 @pytest.mark.parametrize("config", parse_params(GPU_CONFIGS_DIRECTORY))
 def test_consecutive_runs_gpu(config, tmp_path):
     assert not is_model_ct_quantized_from_path(

diff --git a/tests/llmcompressor/transformers/sparsegpt/test_oneshot_with_modifier.py b/tests/llmcompressor/transformers/sparsegpt/test_oneshot_with_modifier.py
@@ -10,7 +10,6 @@
 
 
 @requires_gpu
-@pytest.mark.integration
 @pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
 def test_oneshot_with_modifier_object(tmp_path, config):
     output_dir = tmp_path / "oneshot_out"

diff --git a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_completion.py b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_completion.py
@@ -43,7 +43,6 @@ def labeled_dataloader(dataset_name, model_name, num_samples):
 
 
 @requires_gpu
-@pytest.mark.integration
 @pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
 def test_oneshot_completion(config, tmp_path):
     _test_oneshot_completion(
@@ -58,7 +57,6 @@ def test_oneshot_completion(config, tmp_path):
 
 
 @requires_gpu
-@pytest.mark.integration
 @pytest.mark.parametrize("config", parse_params(GPU_CONFIGS_DIRECTORY))
 def test_oneshot_completion_gpu(config, tmp_path):
     model = AutoModelForCausalLM.from_pretrained(

diff --git a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_infer_targets.py b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_infer_targets.py
@@ -1,11 +1,9 @@
-import pytest
 from accelerate import init_empty_weights
 from transformers import AutoModelForCausalLM
 
 from llmcompressor.modifiers.pruning.sparsegpt import SparseGPTModifier
 
 
-@pytest.mark.integration
 def test_infer_targets():
     modifier = SparseGPTModifier(sparsity=0.0)
     with init_empty_weights():

diff --git a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_lm_head.py b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_lm_head.py
@@ -26,7 +26,6 @@ def dataloader():
     return dataloader
 
 
-@pytest.mark.integration
 @pytest.mark.parametrize("extra_targets,expected", [([], 0), (["lm_head"], 1)])
 def test_lm_head(extra_targets, expected, model, dataloader):
     kwargs = {

diff --git a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_owl.py b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_owl.py
@@ -9,7 +9,6 @@
 from llmcompressor.utils.pytorch.module import get_layers
 
 
-@pytest.mark.integration
 def test_infer_owl_layer_sparsity():
     target_sparsity = 0.7
     vocab_size = 512

diff --git a/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_sparsity.py b/tests/llmcompressor/transformers/sparsegpt/test_sparsegpt_sparsity.py
@@ -24,7 +24,6 @@ def _clear_cuda_cache():
     torch.cuda.empty_cache()
 
 
-@pytest.mark.integration
 @pytest.mark.parametrize("config", parse_params(CONFIGS_DIRECTORY))
 def test_sparsities(tmp_path, config):
     model = oneshot(
@@ -43,9 +42,7 @@ def test_sparsities(tmp_path, config):
     assert math.isclose(layer_2_dense.item(), 0.0, rel_tol=1e-4)
 
 
-# TODO: @Satrat and @dsikka, revisit if we want these nightly or weekly
 @requires_gpu
-@pytest.mark.integration
 @pytest.mark.parametrize("config", parse_params(GPU_CONFIGS_DIRECTORY))
 def test_sparsities_gpu(tmp_path, config, _clear_cuda_cache):
     model = AutoModelForCausalLM.from_pretrained(

diff --git a/tests/unit/core/events/__init__.py b/tests/unit/core/events/__init__.py