diff --git a/.github/actions/upload-benchmark-results/action.yml b/.github/actions/upload-benchmark-results/action.yml
index 8f822c5075..bcc7c1b17f 100644
--- a/.github/actions/upload-benchmark-results/action.yml
+++ b/.github/actions/upload-benchmark-results/action.yml
@@ -19,7 +19,7 @@ runs:
       shell: bash
       run: |
         set -eux
-        python3 -mpip install boto3==1.35.33
+        python3 -mpip install boto3==1.35.33 psutil==7.0.0 pynvml==12.0.0
 
     - name: Check that GITHUB_TOKEN is defined
       if: ${{ inputs.schema-version != 'v2' }}
@@ -72,8 +72,7 @@ runs:
       run: |
         set -eux
 
-        # TODO (huydhn): Implement this part
-        echo "runners=[]" >> "${GITHUB_OUTPUT}"
+        python3 "${GITHUB_ACTION_PATH}/../../scripts/benchmarks/gather_runners_info.py"
 
     - name: Gather the dependencies information
       id: gather-dependencies
diff --git a/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223108.json b/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223108.json
deleted file mode 100644
index 3285abbbea..0000000000
--- a/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223108.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 286.98526, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "generate_time(ms)", "actual": 405.055521, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "token_per_sec", "actual": 306.53265, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 308.598385, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "generate_time(ms)", "actual": 522.4928639999999, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "token_per_sec", "actual": 243.51297, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 280.957917, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "generate_time(ms)", "actual": 1366.6017709999999, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "token_per_sec", "actual": 90.70632, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 289.104427, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "generate_time(ms)", "actual": 733.888385, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223108, "name": "llama2", "dtype": "q8", "metric": "token_per_sec", "actual": 174.28572, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}]
\ No newline at end of file
diff --git a/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223431.json b/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223431.json
deleted file mode 100644
index 3d3fe21c2d..0000000000
--- a/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223431.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "avg_inference_latency(ms)", "actual": 23.0566825, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 18.705938, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "avg_inference_latency(ms)", "actual": 42.8922188, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 14.722292, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 5G", "arch": "Android 13", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "avg_inference_latency(ms)", "actual": 10.9387811, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 13.23677, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22 Ultra 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22 Ultra 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "avg_inference_latency(ms)", "actual": 63.4430574, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "model_load_time(ms)", "actual": 16.301875, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}, {"repo": "pytorch/executorch", "head_branch": "main", "workflow_id": 12345, "run_attempt": 1, "job_id": 31017223431, "name": "mv2 xnnpack", "dtype": "q8", "metric": "load_status", "actual": 0.0, "target": 0.0, "device": "Samsung Galaxy S22+ 5G", "arch": "Android 12", "filename": "android-perf", "test_name": "ANDROID_APP", "runner": "Samsung Galaxy S22+ 5G"}]
\ No newline at end of file
diff --git a/.github/scripts/benchmark-results-dir-for-testing/v3/do_not_overwrite_runners_info.json b/.github/scripts/benchmark-results-dir-for-testing/v3/do_not_overwrite_runners_info.json
new file mode 100644
index 0000000000..8c23615541
--- /dev/null
+++ b/.github/scripts/benchmark-results-dir-for-testing/v3/do_not_overwrite_runners_info.json
@@ -0,0 +1 @@
+{"benchmark": {"name": "ExecuTorch", "mode": "inference", "extra_info": {"app_type": "IOS_APP", "benchmark_config": "{\"model\": \"edsr\", \"config\": \"xnnpack_q8\", \"device_name\": \"apple_iphone_15\", \"device_arn\": \"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d\"}"}}, "model": {"name": "edsr", "type": "OSS model", "backend": "xnnpack_q8"}, "metric": {"name": "peak_inference_mem_usage(mb)", "benchmark_values": [333.2014794921875], "target_value": 0, "extra_info": {"method": "forward"}}, "runners": [{"name": "Apple iPhone 15", "type": "iOS 18.0", "avail_mem_in_gb": 0, "total_mem_in_gb": 0}]}
diff --git a/.github/scripts/benchmarks/gather_runners_info.py b/.github/scripts/benchmarks/gather_runners_info.py
new file mode 100755
index 0000000000..3695cb3cd8
--- /dev/null
+++ b/.github/scripts/benchmarks/gather_runners_info.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import logging
+import os
+import platform
+import socket
+from logging import info
+from typing import Any, Dict
+
+import psutil
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+def set_output(name: str, val: Any) -> None:
+    if os.getenv("GITHUB_OUTPUT"):
+        with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
+            print(f"{name}={val}", file=env)
+    else:
+        print(f"::set-output name={name}::{val}")
+
+
+def get_runner_info() -> Dict[str, Any]:
+    device_name = ""
+    device_type = ""
+
+    try:
+        import torch
+
+        if torch.cuda.is_available():
+            # TODO (huydhn): only support CUDA and ROCm for now
+            if torch.version.hip:
+                device_name = "rocm"
+            elif torch.version.cuda:
+                device_name = "cuda"
+
+            device_type = torch.cuda.get_device_name()
+
+    except ImportError:
+        info("Fail to import torch to get the device name")
+
+    runner_info = {
+        "cpu_info": platform.processor(),
+        "cpu_count": psutil.cpu_count(),
+        "avail_mem_in_gb": int(psutil.virtual_memory().total / (1024 * 1024 * 1024)),
+        "extra_info": {
+            "hostname": socket.gethostname(),
+        },
+    }
+
+    # TODO (huydhn): only support CUDA and ROCm for now
+    if device_name and device_type:
+        runner_info["name"] = device_name
+        runner_info["type"] = device_type
+        runner_info["gpu_count"] = torch.cuda.device_count()
+        runner_info["avail_gpu_mem_in_gb"] = int(
+            torch.cuda.get_device_properties(0).total_memory
+            * torch.cuda.device_count()
+            / (1024 * 1024 * 1024)
+        )
+
+    return runner_info
+
+
+def main() -> None:
+    runner_info = get_runner_info()
+    set_output("runners", json.dumps([runner_info]))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/test_upload_benchmark_results.yml b/.github/workflows/test_upload_benchmark_results.yml
index 15be9b16d7..f223f18d38 100644
--- a/.github/workflows/test_upload_benchmark_results.yml
+++ b/.github/workflows/test_upload_benchmark_results.yml
@@ -13,13 +13,6 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Test upload the benchmark results (v2)
-        uses: ./.github/actions/upload-benchmark-results
-        with:
-          benchmark-results-dir: .github/scripts/benchmark-results-dir-for-testing/v2
-          schema-version: v2
-          dry-run: true
-
       - name: Test upload the benchmark results (v3)
         uses: ./.github/actions/upload-benchmark-results
         with: