[GuideLLM Refactor] Fix from-file (#366)

jaredoconnell · sjmonson · commit 1bfc15d4f7b9 · 2025-10-10T11:49:54.000-04:00
## Summary

This PR ports the new functionality from `benchmark run` to `benchmark
from-file`, and does so in a way that reuses as much code as practical
to have one source of truth.

## Details

&lt;!--
Provide a detailed list of all changes introduced in this pull request.
--&gt;
- Fixes from-file by making it to use the new output format.
- Moves code related to the new output formats to separate functions
that are called from both benchmark entrypoints.
- Moves additional chunks of code out of the large benchmark run
entrypoint function for modularity.

## Test Plan

Run a benchmark with an output of json or yaml, and use `from-file` to
re-import it and export it. You can select any output type supported by
`benchmark run`.

`guidellm benchmark from-file ./result.json --output-formats console`
`guidellm benchmark from-file ./result.yaml --output-formats yaml`

## Related Issues

---

- [x] "I certify that all code in this PR is my own, except as noted
below."

## Use of AI

- [x] Includes AI-assisted code completion
- [ ] Includes code generated by an AI application
- [ ] Includes AI-generated tests (NOTE: AI written tests should have a
docstring that includes `## WRITTEN BY AI ##`)

---------

Signed-off-by: Jared O'Connell &lt;joconnel@redhat.com&gt;
diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -473,31 +473,38 @@ def run(
 )
 @click.option(
     "--output-path",
-    type=click.Path(file_okay=True, dir_okay=True, exists=False),
-    default=None,
-    is_flag=False,
-    flag_value=Path.cwd() / "benchmarks_reexported.json",
+    type=click.Path(),
+    default=Path.cwd(),
+    help=(
+        "Allows re-exporting the benchmarks to other formats. "
+        "The path to save the output formats to, if the format is a file type. "
+        "If it is a directory, it will save all output formats selected under it. "
+        "If it is a file, it will save the corresponding output format to that file. "
+        "Any output formats that were given that do not match the file extension will "
+        "be saved in the parent directory of the file path. "
+        "Defaults to the current working directory. "
+    ),
+)
+@click.option(
+    "--output-formats",
+    multiple=True,
+    type=str,
+    default=("console", "json"),  # ("console", "json", "html", "csv")
     help=(
-        "Allows re-exporting the benchmarks to another format. "
-        "The path to save the output to. If it is a directory, "
-        "it will save benchmarks.json under it. "
-        "Otherwise, json, yaml, or csv files are supported for output types "
-        "which will be read from the extension for the file path. "
-        "This input is optional. If the output path flag is not provided, "
-        "the benchmarks will not be reexported. If the flag is present but "
-        "no value is specified, it will default to the current directory "
-        "with the file name `benchmarks_reexported.json`."
+        "The output formats to use for the benchmark results. "
+        "Defaults to console, json, html, and csv where the file formats "
+        "will be saved at the specified output path."
     ),
 )
-def from_file(path, output_path):
+def from_file(path, output_path, output_formats):
     """
     Load and optionally re-export a previously saved benchmark report.
 
     Imports benchmark results from a saved file and provides optional conversion
     to different output formats. Supports JSON, YAML, and CSV export formats
     based on the output file extension.
     """
-    reimport_benchmarks_report(path, output_path)
+    asyncio.run(reimport_benchmarks_report(path, output_path, output_formats))
 
 
 @cli.command(
diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -26,7 +26,6 @@
 from guidellm.benchmark.benchmarker import Benchmarker
 from guidellm.benchmark.objects import GenerativeBenchmark, GenerativeBenchmarksReport
 from guidellm.benchmark.output import (
-    GenerativeBenchmarkerConsole,
     GenerativeBenchmarkerOutput,
 )
 from guidellm.benchmark.profile import Profile, ProfileType
@@ -53,6 +52,97 @@
 _CURRENT_WORKING_DIR = Path.cwd()
 
 
+# Data types
+
+DataType = (
+    Iterable[str]
+    | Iterable[dict[str, Any]]
+    | Dataset
+    | DatasetDict
+    | IterableDataset
+    | IterableDatasetDict
+    | str
+    | Path
+)
+
+OutputFormatType = (
+    tuple[str, ...]
+    | list[str]
+    | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
+    | None
+)
+
+
+# Helper functions
+
+async def initialize_backend(
+    backend: BackendType | Backend,
+    target: str,
+    model: str | None,
+    backend_kwargs: dict[str, Any] | None,
+) -> Backend:
+    backend = (
+        Backend.create(
+            backend, target=target, model=model, **(backend_kwargs or {})
+        )
+        if not isinstance(backend, Backend)
+        else backend
+    )
+    await backend.process_startup()
+    await backend.validate()
+    return backend
+
+
+async def resolve_profile(
+    constraint_inputs: dict[str, int | float],
+    profile: Profile | str | None,
+    rate: list[float] | None,
+    random_seed: int,
+    constraints: dict[str, ConstraintInitializer | Any],
+):
+    for key, val in constraint_inputs.items():
+        if val is not None:
+            constraints[key] = val
+    if not isinstance(profile, Profile):
+        if isinstance(profile, str):
+            profile = Profile.create(
+                rate_type=profile,
+                rate=rate,
+                random_seed=random_seed,
+                constraints={**constraints},
+            )
+        else:
+            raise ValueError(f"Expected string for profile; got {type(profile)}")
+
+    elif constraints:
+        raise ValueError(
+            "Constraints must be empty when providing a Profile instance. "
+            f"Provided constraints: {constraints} ; provided profile: {profile}"
+        )
+    return profile
+
+async def resolve_output_formats(
+    output_formats: OutputFormatType,
+    output_path: str | Path | None,
+) -> dict[str, GenerativeBenchmarkerOutput]:
+    output_formats = GenerativeBenchmarkerOutput.resolve(
+        output_formats=(output_formats or {}), output_path=output_path
+    )
+    return output_formats
+
+async def finalize_outputs(
+    report: GenerativeBenchmarksReport,
+    resolved_output_formats: dict[str, GenerativeBenchmarkerOutput]
+):
+    output_format_results = {}
+    for key, output in resolved_output_formats.items():
+        output_result = await output.finalize(report)
+        output_format_results[key] = output_result
+    return output_format_results
+
+
+# Complete entrypoints
+
 async def benchmark_with_scenario(scenario: Scenario, **kwargs):
     """
     Run a benchmark using a scenario and specify any extra arguments
@@ -67,16 +157,7 @@ async def benchmark_with_scenario(scenario: Scenario, **kwargs):
 # @validate_call(config={"arbitrary_types_allowed": True})
 async def benchmark_generative_text(  # noqa: C901
     target: str,
-    data: (
-        Iterable[str]
-        | Iterable[dict[str, Any]]
-        | Dataset
-        | DatasetDict
-        | IterableDataset
-        | IterableDatasetDict
-        | str
-        | Path
-    ),
+    data: DataType,
     profile: StrategyType | ProfileType | Profile,
     rate: float | list[float] | None = None,
     random_seed: int = 42,
@@ -91,12 +172,7 @@ async def benchmark_generative_text(  # noqa: C901
     data_sampler: Literal["random"] | None = None,
     # Output configuration
     output_path: str | Path | None = _CURRENT_WORKING_DIR,
-    output_formats: (
-        tuple[str, ...]
-        | list[str]
-        | dict[str, str | dict[str, Any] | GenerativeBenchmarkerOutput]
-        | None
-    ) = ("console", "json", "html", "csv"),
+    output_formats: OutputFormatType = ("console", "json", "html", "csv"),
     # Updates configuration
     progress: tuple[str, ...] | list[str] | list[BenchmarkerProgress] | None = None,
     print_updates: bool = False,
@@ -120,16 +196,7 @@ async def benchmark_generative_text(  # noqa: C901
     with console.print_update_step(
         title=f"Initializing backend {backend}"
     ) as console_step:
-        backend = (
-            Backend.create(
-                backend, target=target, model=model, **(backend_kwargs or {})
-            )
-            if not isinstance(backend, Backend)
-            else backend
-        )
-        console_step.update(f"{backend.__class__.__name__} backend initialized")
-        await backend.process_startup()
-        await backend.validate()
+        backend = await initialize_backend(backend, target, model, backend_kwargs)
         console_step.finish(
             title=f"{backend.__class__.__name__} backend initialized",
             details=backend.info,
@@ -190,27 +257,19 @@ async def benchmark_generative_text(  # noqa: C901
     with console.print_update_step(
         title=f"Resolving profile {profile}"
     ) as console_step:
-        for key, val in {
-            "max_seconds": max_seconds,
-            "max_requests": max_requests,
-            "max_errors": max_errors,
-            "max_error_rate": max_error_rate,
-            "max_global_error_rate": max_global_error_rate,
-        }.items():
-            if val is not None:
-                constraints[key] = val
-        if not isinstance(profile, Profile):
-            profile = Profile.create(
-                rate_type=profile,
-                rate=rate,
-                random_seed=random_seed,
-                constraints={**constraints},
-            )
-        elif constraints:
-            raise ValueError(
-                "Constraints must be empty when providing a Profile instance. "
-                f"Provided constraints: {constraints} ; provided profile: {profile}"
-            )
+        profile = await resolve_profile(
+            {
+                "max_seconds": max_seconds,
+                "max_requests": max_requests,
+                "max_errors": max_errors,
+                "max_error_rate": max_error_rate,
+                "max_global_error_rate": max_global_error_rate,
+            },
+            profile,
+            rate,
+            random_seed,
+            constraints,
+        )
         console_step.finish(
             title=f"{profile.__class__.__name__} profile resolved",
             details=InfoMixin.extract_from_obj(profile),
@@ -237,12 +296,10 @@ async def benchmark_generative_text(  # noqa: C901
         )
 
     with console.print_update_step(title="Resolving output formats") as console_step:
-        output_formats = GenerativeBenchmarkerOutput.resolve(
-            output_formats=(output_formats or {}), output_path=output_path
-        )
+        resolved_output_formats = await resolve_output_formats(output_formats, output_path)
         console_step.finish(
             title="Output formats resolved",
-            details={key: str(val) for key, val in output_formats.items()},
+            details={key: str(val) for key, val in resolved_output_formats.items()},
             status_level="success",
         )
 
@@ -278,14 +335,11 @@ async def benchmark_generative_text(  # noqa: C901
         if benchmark:
             report.benchmarks.append(benchmark)
 
-    output_format_results = {}
-    for key, output in output_formats.items():
-        output_result = await output.finalize(report)
-        output_format_results[key] = output_result
+    output_format_results = await finalize_outputs(report, resolved_output_formats)
 
     console.print("\n\n")
     console.print_update(
-        title=f"Benchmarking complete, generated {len(report.benchmarks)} benchmark(s)",
+        title=f"Benchmarking complete; generated {len(report.benchmarks)} benchmark(s)",
         status="success",
     )
     for key, value in output_format_results.items():
@@ -294,20 +348,34 @@ async def benchmark_generative_text(  # noqa: C901
     return report, output_format_results
 
 
-def reimport_benchmarks_report(file: Path, output_path: Path | None) -> None:
+async def reimport_benchmarks_report(
+    file: Path,
+    output_path: Path | None,
+    output_formats: OutputFormatType = ("console", "json", "html", "csv"),
+) -> tuple[GenerativeBenchmarksReport, dict[str, Any]]:
     """
     The command-line entry point for re-importing and displaying an
-    existing benchmarks report. Can also specify
+    existing benchmarks report. Can also specify an output format.
     Assumes the file provided exists.
     """
-    report = GenerativeBenchmarksReport.load_file(file)
-    console_output = GenerativeBenchmarkerConsole()
-    console_output.finalize(report)
     console = Console()
+    with console.print_update_step(
+        title=f"Loading benchmarks from {file}"
+    ) as console_step:
+        report = GenerativeBenchmarksReport.load_file(file)
+        console_step.finish(f"Import of old benchmarks complete; loaded {len(report.benchmarks)} benchmark(s)")
+
+    with console.print_update_step(title="Resolving output formats") as console_step:
+        resolved_output_formats = await resolve_output_formats(output_formats, output_path)
+        console_step.finish(
+            title="Output formats resolved",
+            details={key: str(val) for key, val in resolved_output_formats.items()},
+            status_level="success",
+        )
 
-    if output_path:
-        with console.print_update_step(
-            title=f"Saving benchmarks report to {output_path}..."
-        ) as console_step:
-            saved_path = report.save_file(output_path)
-            console_step.finish(title=f"Benchmarks report saved to {saved_path}")
+    output_format_results = await finalize_outputs(report, resolved_output_formats)
+
+    for key, value in output_format_results.items():
+        console.print_update(title=f"  {key:<8}: {value}", status="debug")
+
+    return report, output_format_results