Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llm/testing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) PyPTO Contributors.
# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
# CANN Open Software License Agreement Version 2.0 (the "License").
# Please refer to the License for details. You may not use this file except in compliance with the License.
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
# See LICENSE in the root of the software repository for the full text of the License.
# -----------------------------------------------------------------------------------------------------------
"""Testing utilities for pypto-lib (accuracy comparison, regression harnesses, ...)."""
59 changes: 59 additions & 0 deletions llm/testing/hf_compare/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) PyPTO Contributors.
# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
# CANN Open Software License Agreement Version 2.0 (the "License").
# Please refer to the License for details. You may not use this file except in compliance with the License.
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
# See LICENSE in the root of the software repository for the full text of the License.
# -----------------------------------------------------------------------------------------------------------
"""Hugging Face comparison framework.

This package provides a pluggable harness for verifying that PyPTO kernels /
scopes / end-to-end models produce numerically equivalent outputs to a
reference implementation (typically a Hugging Face module on CPU).

Top-level entry points:

from llm.testing.hf_compare import (
ComparisonCase, register_case, get_case, list_cases,
)

Cases live under ``llm/testing/hf_compare/cases/`` and are registered with
``@register_case("model.scope")``. The CLI in ``__main__.py`` discovers them
by name.
"""
from .base import (
ChainStep,
ChainedComparisonCase,
CompareReport,
ComparisonCase,
InputSpec,
OutputSelector,
ReferenceModel,
SelectorResult,
TargetModel,
TensorSpec,
Tolerance,
WeightAdapter,
get_case,
list_cases,
register_case,
)

__all__ = [
"ChainStep",
"ChainedComparisonCase",
"CompareReport",
"ComparisonCase",
"InputSpec",
"OutputSelector",
"ReferenceModel",
"SelectorResult",
"TargetModel",
"TensorSpec",
"Tolerance",
"WeightAdapter",
"get_case",
"list_cases",
"register_case",
]
66 changes: 66 additions & 0 deletions llm/testing/hf_compare/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright (c) PyPTO Contributors.
# This program is free software, you can redistribute it and/or modify it under the terms and conditions of
# CANN Open Software License Agreement Version 2.0 (the "License").
# Please refer to the License for details. You may not use this file except in compliance with the License.
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
# See LICENSE in the root of the software repository for the full text of the License.
# -----------------------------------------------------------------------------------------------------------
"""CLI for the HF comparison framework.

Usage:
python -m llm.testing.hf_compare list
python -m llm.testing.hf_compare run qwen3_14b.decode \\
--hf-model-path /path/to/Qwen3-14B --platform a2a3 [--cpu-only]
"""
Comment on lines +9 to +15
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Module docstring advertises CLI flags that don't exist.

The example shows --hf-model-path, --platform, and --cpu-only, but the run subparser only registers case, --json, and --kwarg/-k. Anyone copy-pasting this command will get error: unrecognized arguments. The PR test plan correctly uses -k hf_model_path=... -k cpu_only=true, so update the docstring to match.

📝 Suggested docstring fix
 """CLI for the HF comparison framework.
 
 Usage:
     python -m llm.testing.hf_compare list
-    python -m llm.testing.hf_compare run qwen3_14b.decode \\
-        --hf-model-path /path/to/Qwen3-14B --platform a2a3 [--cpu-only]
+    python -m llm.testing.hf_compare run qwen3_14b.decode \\
+        -k hf_model_path=/path/to/Qwen3-14B -k platform=a2a3 [-k cpu_only=true]
 """
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@llm/testing/hf_compare/__main__.py` around lines 9 - 15, The module docstring
at the top of llm/testing/hf_compare/__main__.py advertises CLI flags
(--hf-model-path, --platform, --cpu-only) that the parser doesn't register;
update the docstring to show the actual usage of the implemented CLI (the "list"
and "run" subcommands) and example invocation for "run" using the registered
positional "case", the --json flag and the kwarg option (-k/--kwarg), e.g. show
usages like "python -m llm.testing.hf_compare list" and "python -m
llm.testing.hf_compare run qwen3_14b.decode -k hf_model_path=/path/to/Qwen3-14B
-k cpu_only=true" so copy-paste works and matches how the run subparser (the
parser.add_subparsers / run subparser and its --json and -k/--kwarg definitions)
is actually implemented.

from __future__ import annotations

import argparse
import json
import sys

from .base import get_case, list_cases


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(prog="hf_compare")
sub = parser.add_subparsers(dest="cmd", required=True)

sub.add_parser("list", help="List registered cases.")

run = sub.add_parser("run", help="Run one case.")
run.add_argument("case", help="Registered case name (see `list`).")
run.add_argument("--json", action="store_true", help="Print JSON report.")
run.add_argument(
"--kwarg", "-k", action="append", default=[],
help="Extra kwarg for the case factory, e.g. -k hf_model_path=/data/...",
)

args = parser.parse_args(argv)
# Autodiscovery is lazy: list_cases() / get_case() trigger it internally.

if args.cmd == "list":
for name in list_cases():
print(name)
return 0

if args.cmd == "run":
kwargs: dict[str, str] = {}
for kv in args.kwarg:
if "=" not in kv:
parser.error(f"--kwarg must be key=value, got {kv!r}")
k, v = kv.split("=", 1)
kwargs[k] = v
case = get_case(args.case, **kwargs)
report = case.run()
if args.json:
print(json.dumps(report.to_json(), indent=2))
else:
print(report.summary())
return 0 if report.passed else 1

return 2


if __name__ == "__main__":
sys.exit(main())
Loading
Loading