nod-ai
diff --git a/‎app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py
+17-15 b/‎app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py
+17-15
diff --git a/‎app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py
+25-7 b/‎app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py
+25-7
diff --git a/‎app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py
+16-16 b/‎app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py
+16-16
diff --git a/‎app_tests/integration_tests/llm/logging_utils.py
+21 b/‎app_tests/integration_tests/llm/logging_utils.py
+21
diff --git a/‎app_tests/integration_tests/llm/model_management.py
+63-12 b/‎app_tests/integration_tests/llm/model_management.py
+63-12
diff --git a/‎app_tests/integration_tests/llm/server_management.py
+1-1 b/‎app_tests/integration_tests/llm/server_management.py
+1-1
@@ -14,13 +14,12 @@
 sys.path.append(
     os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
 )
-from integration_tests.llm.utils import (
-    compile_model,
-    end_log_group,
-    export_paged_llm_v1,
-    download_with_hf_datasets,
-    start_log_group,
+from integration_tests.llm.model_management import (
+    ModelConfig,
+    ModelProcessor,
+    ModelSource,
 )
+from integration_tests.llm.logging_utils import start_log_group, end_log_group
 
 logger = logging.getLogger(__name__)
 
@@ -47,16 +46,19 @@ def pre_process_model(request, tmp_path_factory):
     settings = request.param["settings"]
     batch_sizes = request.param["batch_sizes"]
 
-    mlir_path = tmp_dir / "model.mlir"
-    config_path = tmp_dir / "config.json"
-    vmfb_path = tmp_dir / "model.vmfb"
-
-    model_path = tmp_dir / model_param_file_name
-    download_with_hf_datasets(tmp_dir, model_name)
-
-    export_paged_llm_v1(mlir_path, config_path, model_path, batch_sizes)
+    # Configure model
+    config = ModelConfig(
+        model_file=model_param_file_name,
+        tokenizer_id=model_name,  # Using model_name as tokenizer_id, adjust if needed
+        batch_sizes=batch_sizes,
+        device_settings=settings,
+        source=ModelSource.HUGGINGFACE,
+        repo_id=model_name,  # Using model_name as repo_id, adjust if needed
+    )
 
-    compile_model(mlir_path, vmfb_path, settings)
+    # Process model through all stages
+    processor = ModelProcessor(tmp_dir)
+    artifacts = processor.process_model(config)
 
     logger.info("Model artifacts setup successfully" + end_log_group())
     MODEL_DIR_CACHE[param_key] = tmp_dir
 
@@ -13,8 +13,12 @@
 from sglang import bench_serving
 
 from .utils import SGLangBenchmarkArgs, log_jsonl_result
-
-from integration_tests.llm.utils import download_tokenizer, wait_for_server
+from integration_tests.llm.model_management import (
+    ModelConfig,
+    ModelProcessor,
+    ModelSource,
+)
+from integration_tests.llm.server_management import ServerInstance
 
 logger = logging.getLogger(__name__)
 
@@ -26,17 +30,31 @@
 def test_sglang_benchmark(request_rate, tokenizer_id, sglang_args, tmp_path_factory):
     tmp_dir = tmp_path_factory.mktemp("sglang_benchmark_test")
 
-    # Download tokenizer for llama3_8B_fp16
-    download_tokenizer(tmp_dir, tokenizer_id)
+    # Download tokenizer using ModelProcessor
+    config = ModelConfig(
+        model_file="tokenizer.json",  # Only need tokenizer
+        tokenizer_id=tokenizer_id,
+        batch_sizes=(1,),  # Not relevant for tokenizer only
+        device_settings=None,  # Not relevant for tokenizer only
+        source=ModelSource.HUGGINGFACE,
+        repo_id=tokenizer_id,
+    )
+    processor = ModelProcessor(tmp_dir)
+    artifacts = processor.process_model(config)
 
     logger.info("Beginning SGLang benchmark test...")
 
     port = sglang_args
     base_url = f"http://localhost:{port}"
 
-    # Setting a high timeout gives enough time for downloading model artifacts
-    # and starting up server... Takes a little longer than shortfin.
-    wait_for_server(base_url, timeout=600)
+    # Wait for server using ServerInstance's method
+    server = ServerInstance(
+        None
+    )  # We don't need config since we're just using wait_for_ready
+    server.port = int(port)  # Set port manually since we didn't start the server
+    server.wait_for_ready(
+        timeout=600
+    )  # High timeout for model artifacts download and server startup
 
     benchmark_args = SGLangBenchmarkArgs(
         backend="sglang",
 
@@ -19,12 +19,9 @@
     log_jsonl_result,
 )
 
-from integration_tests.llm.utils import (
-    end_log_group,
-    find_available_port,
-    start_llm_server,
-    start_log_group,
-)
+from integration_tests.llm.logging_utils import end_log_group, start_log_group
+from integration_tests.llm.server_management import ServerConfig, ServerInstance
+from integration_tests.llm.model_management import ModelArtifacts
 
 logger = logging.getLogger(__name__)
 
@@ -83,20 +80,24 @@ def test_shortfin_benchmark(
     model_path = tmp_dir / model_param_file_name
 
     # Start shortfin llm server
-    server_process, port = start_llm_server(
-        tokenizer_path,
-        config_path,
-        vmfb_path,
-        model_path,
-        device_settings,
-        timeout=30,
+    server_config = ServerConfig(
+        artifacts=ModelArtifacts(
+            weights_path=model_path,
+            tokenizer_path=tokenizer_path,
+            mlir_path=tmp_dir / "model.mlir",
+            vmfb_path=vmfb_path,
+            config_path=config_path,
+        ),
+        device_settings=device_settings,
     )
+    server = ServerInstance(server_config)
+    server.start()
 
     # Run and collect SGLang Serving Benchmark
     benchmark_args = SGLangBenchmarkArgs(
         backend="shortfin",
         num_prompt=10,
-        base_url=f"http://localhost:{port}",
+        base_url=f"http://localhost:{server.port}",
         tokenizer=tmp_dir,
         request_rate=request_rate,
     )
@@ -130,5 +131,4 @@ def test_shortfin_benchmark(
     except Exception as e:
         logger.error(e)
 
-    server_process.terminate()
-    server_process.wait()
+    server.stop()
@@ -0,0 +1,21 @@
+# Copyright 2024 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import os
+
+
+def start_log_group(headline):
+    """Start a collapsible log group in GitHub Actions."""
+    if os.environ.get("GITHUB_ACTIONS") == "true":
+        return f"\n::group::{headline}"
+    return ""
+
+
+def end_log_group():
+    """End a collapsible log group in GitHub Actions."""
+    if os.environ.get("GITHUB_ACTIONS") == "true":
+        return "\n::endgroup::"
+    return ""
@@ -3,12 +3,20 @@
 from pathlib import Path
 import subprocess
 from dataclasses import dataclass
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Dict
 from enum import Enum, auto
 
+from sharktank.utils.hf_datasets import Dataset, RemoteFile, get_dataset
+
 logger = logging.getLogger(__name__)
 
 
+class AccuracyValidationException(RuntimeError):
+    """Exception raised when accuracy validation fails."""
+
+    pass
+
+
 class ModelSource(Enum):
     HUGGINGFACE = auto()
     LOCAL = auto()
@@ -34,13 +42,17 @@ class ModelConfig:
     batch_sizes: Tuple[int, ...]
     device_settings: "DeviceSettings"
     source: ModelSource
+    dataset_name: Optional[str] = None  # Name of the dataset in hf_datasets.py
     repo_id: Optional[str] = None
     local_path: Optional[Path] = None
     azure_config: Optional[AzureConfig] = None
 
     def __post_init__(self):
-        if self.source == ModelSource.HUGGINGFACE and not self.repo_id:
-            raise ValueError("repo_id required for HuggingFace models")
+        if self.source == ModelSource.HUGGINGFACE:
+            if not (self.dataset_name or self.repo_id):
+                raise ValueError(
+                    "Either dataset_name or repo_id required for HuggingFace models"
+                )
         elif self.source == ModelSource.LOCAL and not self.local_path:
             raise ValueError("local_path required for local models")
         elif self.source == ModelSource.AZURE and not self.azure_config:
@@ -70,6 +82,8 @@ def __init__(self, base_dir: Path, config: ModelConfig):
     def _get_model_dir(self) -> Path:
         """Creates and returns appropriate model directory based on source."""
         if self.config.source == ModelSource.HUGGINGFACE:
+            if self.config.dataset_name:
+                return self.base_dir / self.config.dataset_name.replace("/", "_")
             return self.base_dir / self.config.repo_id.replace("/", "_")
         elif self.config.source == ModelSource.LOCAL:
             return self.base_dir / "local" / self.config.local_path.stem
@@ -82,15 +96,36 @@ def _get_model_dir(self) -> Path:
         raise ValueError(f"Unsupported model source: {self.config.source}")
 
     def _download_from_huggingface(self) -> Path:
-        """Downloads model from HuggingFace."""
+        """Downloads model from HuggingFace using hf_datasets.py."""
         model_path = self.model_dir / self.config.model_file
         if not model_path.exists():
-            logger.info(f"Downloading model {self.config.repo_id} from HuggingFace")
-            subprocess.run(
-                f"huggingface-cli download --local-dir {self.model_dir} {self.config.repo_id} {self.config.model_file}",
-                shell=True,
-                check=True,
-            )
+            if self.config.dataset_name:
+                logger.info(
+                    f"Downloading model {self.config.dataset_name} using hf_datasets"
+                )
+                dataset = get_dataset(self.config.dataset_name)
+                downloaded_files = dataset.download(local_dir=self.model_dir)
+
+                # Find the model file in downloaded files
+                for file_id, paths in downloaded_files.items():
+                    for path in paths:
+                        if path.name == self.config.model_file:
+                            return path
+
+                raise ValueError(
+                    f"Model file {self.config.model_file} not found in dataset {self.config.dataset_name}"
+                )
+            else:
+                logger.info(f"Downloading model {self.config.repo_id} from HuggingFace")
+                # Create a temporary dataset for direct repo downloads
+                remote_file = RemoteFile(
+                    file_id="model",
+                    repo_id=self.config.repo_id,
+                    filename=self.config.model_file,
+                )
+                downloaded_paths = remote_file.download(local_dir=self.model_dir)
+                return downloaded_paths[0]
+
         return model_path
 
     def _copy_from_local(self) -> Path:
@@ -132,14 +167,30 @@ def _download_from_azure(self) -> Path:
         return model_path
 
     def prepare_tokenizer(self) -> Path:
-        """Downloads and prepares tokenizer."""
+        """Downloads and prepares tokenizer using hf_datasets.py when possible."""
         tokenizer_path = self.model_dir / "tokenizer.json"
+
         if not tokenizer_path.exists():
-            logger.info(f"Downloading tokenizer {self.config.tokenizer_id}")
+            # First try to get tokenizer from dataset if available
+            if self.config.dataset_name:
+                dataset = get_dataset(self.config.dataset_name)
+                downloaded_files = dataset.download(local_dir=self.model_dir)
+
+                # Look for tokenizer files in downloaded files
+                for file_id, paths in downloaded_files.items():
+                    for path in paths:
+                        if path.name == "tokenizer.json":
+                            return path
+
+            # Fall back to downloading from transformers if not found in dataset
+            logger.info(
+                f"Downloading tokenizer {self.config.tokenizer_id} using transformers"
+            )
             from transformers import AutoTokenizer
 
             tokenizer = AutoTokenizer.from_pretrained(self.config.tokenizer_id)
             tokenizer.save_pretrained(self.model_dir)
+
         return tokenizer_path
 
     def export_model(self, weights_path: Path) -> Tuple[Path, Path]:
 
@@ -81,7 +81,7 @@ def start(self) -> None:
         self.process = subprocess.Popen(cmd)
         self.wait_for_ready()
 
-    def wait_for_ready(self, timeout: int = 10) -> None:
+    def wait_for_ready(self, timeout: int = 30) -> None:
         """Waits for server to be ready and responding to health checks."""
         if self.port is None:
             raise RuntimeError("Server hasn't been started")