Skip to content

Commit 0019d99

Browse files
authored
[None][test] Add longbench v2 for long context evaluation (#8604)
Signed-off-by: mni <[email protected]>
1 parent 1026069 commit 0019d99

File tree

3 files changed

+854
-2
lines changed

3 files changed

+854
-2
lines changed

tensorrt_llm/commands/eval.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from .. import LLM as PyTorchLLM
2222
from .._tensorrt_engine import LLM
2323
from ..evaluate import (GSM8K, MMLU, MMMU, CnnDailymail, GPQADiamond,
24-
GPQAExtended, GPQAMain, JsonModeEval)
24+
GPQAExtended, GPQAMain, JsonModeEval, LongBenchV2)
2525
from ..llmapi import BuildConfig, KvCacheConfig
2626
from ..llmapi.llm_utils import update_llm_args_with_extra_options
2727
from ..logger import logger, severity_map
@@ -159,6 +159,7 @@ def main(ctx, model: str, tokenizer: Optional[str], log_level: str,
159159
main.add_command(GPQAExtended.command)
160160
main.add_command(JsonModeEval.command)
161161
main.add_command(MMMU.command)
162+
main.add_command(LongBenchV2.command)
162163

163164
if __name__ == "__main__":
164165
main()

tensorrt_llm/evaluate/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
from .cnn_dailymail import CnnDailymail
1717
from .json_mode_eval import JsonModeEval
1818
from .lm_eval import GSM8K, MMMU, GPQADiamond, GPQAExtended, GPQAMain
19+
from .longbench_v2 import LongBenchV2
1920
from .mmlu import MMLU
2021

2122
__all__ = [
2223
"CnnDailymail", "MMLU", "GSM8K", "GPQADiamond", "GPQAMain", "GPQAExtended",
23-
"JsonModeEval", "MMMU"
24+
"JsonModeEval", "MMMU", "LongBenchV2"
2425
]

0 commit comments

Comments
 (0)