From 75e1d3b60c9b79761fb281b6405e2c07c9ddacb4 Mon Sep 17 00:00:00 2001 From: akolliasAMD Date: Tue, 23 Jun 2026 17:16:26 -0600 Subject: [PATCH 1/3] added a check for ais capable volumes --- projects/hipfile/tools/ais-check/ais-check | 292 +++++++++++++++++- .../tools/ais-check/tests/test_main.py | 21 +- .../tools/ais-check/tests/test_volumes.py | 86 ++++++ 3 files changed, 393 insertions(+), 6 deletions(-) create mode 100644 projects/hipfile/tools/ais-check/tests/test_volumes.py diff --git a/projects/hipfile/tools/ais-check/ais-check b/projects/hipfile/tools/ais-check/ais-check index 50e87202c87..2f56ea81b15 100755 --- a/projects/hipfile/tools/ais-check/ais-check +++ b/projects/hipfile/tools/ais-check/ais-check @@ -7,9 +7,25 @@ """ Check if the necessary AMD Infinity Storage (AIS) components are installed +and whether at least one mounted volume can use hipFile's fast path. -If all necessary components are installed the program exits with exit code 0. -If components are missing the program exits with a non-zero exit code. +If all required components are installed and at least one volume is +hipFile-capable the program exits with exit code 0. Otherwise it exits with a +non-zero exit code. + +A volume is reported as hipFile-capable when it mirrors the fast-path +eligibility the hipFile library enforces for a regular file: + + * filesystem is xfs, or ext4 mounted with data=ordered (the ext4 default), and + * direct I/O (O_DIRECT) opens succeed on the filesystem. + +See src/amd_detail/mountinfo.cpp (fstype/journaling classification) and +src/amd_detail/backend/fastpath.cpp (the accept rule) for the authoritative +logic the volume scan reproduces. + +The BACKING column (nvme, lvm, md, ...) is informational only: a local NVMe +(non multipath) device is required by amdgpu/kfd, but that constraint is +enforced by the driver, not by the library's fast-path scoring. """ # pylint: enable=invalid-name @@ -17,12 +33,40 @@ If components are missing the program exits with a non-zero exit code. import argparse import ctypes import ctypes.util +import errno import glob import gzip import os import sys +# Mounts backed by these (or any non-block source) can never use the fast path. +PSEUDO_FSTYPES = { + "autofs", + "bpf", + "cgroup", + "cgroup2", + "configfs", + "debugfs", + "devpts", + "devtmpfs", + "efivarfs", + "fuse.gvfsd-fuse", + "fusectl", + "hugetlbfs", + "mqueue", + "overlay", + "proc", + "pstore", + "ramfs", + "securityfs", + "squashfs", + "sysfs", + "tmpfs", + "tracefs", +} + + def kernel_supports_p2pdma(): """ Check for P2P DMA support in the kernel @@ -234,11 +278,240 @@ def amdgpu_supports_ais(): return False +class Mount: # pylint: disable=too-few-public-methods + """A single parsed /proc/self/mountinfo entry.""" + + def __init__(self, devno, mountpoint, fstype, source, options): + self.devno = devno # "maj:min" + self.mountpoint = mountpoint + self.fstype = fstype + self.source = source + self.options = options # combined mount + super options, comma-joined + + def option(self, name): + """Return the value of mount option `name`, or None if unset/valueless.""" + for opt in self.options.split(","): + if opt == name: + return "" + if opt.startswith(name + "="): + return opt.split("=", 1)[1] + return None + + +def unescape(field): + """Decode the octal escapes (\\040 etc.) mountinfo uses for spaces/tabs.""" + out = [] + i = 0 + while i < len(field): + if field[i] == "\\" and field[i + 1 : i + 4].isdigit(): + out.append(chr(int(field[i + 1 : i + 4], 8))) + i += 4 + else: + out.append(field[i]) + i += 1 + return "".join(out) + + +def parse_mountinfo(): + """Parse /proc/self/mountinfo into a list of Mount objects.""" + mounts = [] + with open("/proc/self/mountinfo", "r", encoding="utf-8") as f: + for line in f: + fields = line.split() + # Fields up to the "-" separator are fixed; optional fields vary. + try: + sep = fields.index("-") + except ValueError: + continue + devno = fields[2] + mountpoint = unescape(fields[4]) + mount_opts = fields[5] + fstype = fields[sep + 1] + source = unescape(fields[sep + 2]) + super_opts = fields[sep + 3] if len(fields) > sep + 3 else "" + options = ",".join(o for o in (mount_opts, super_opts) if o) + mounts.append(Mount(devno, mountpoint, fstype, source, options)) + return mounts + + +def fs_supported(mount): + """ + Mirror mountinfo.cpp: True if the filesystem type/journaling qualifies. + + ext4 qualifies only with data=ordered (the default when no data= option is + present); xfs always qualifies. + """ + if mount.fstype == "xfs": + return True + if mount.fstype == "ext4": + data = mount.option("data") + # Absent data= option means ordered (ext4 default), which qualifies. + return data is None or data == "ordered" + return False + + +def fstype_label(mount): + """ + Render the filesystem type, folding ext4's journal mode in parentheses. + + ext4 always journals, so we surface the data= mode (defaulting to the + "ordered" ext4 default) as e.g. "ext4 (ordered)". Other filesystems are + shown bare since the journal mode either doesn't apply or isn't expressed + as a data= option. + """ + if mount.fstype == "ext4": + return f"ext4 ({mount.option('data') or 'ordered'})" + return mount.fstype + + +def probe_odirect(mountpoint): + """ + Determine O_DIRECT support by opening a temp file with O_DIRECT. + + Returns True (supported), False (rejected with EINVAL), or None when it + could not be verified (e.g. read-only or no write permission). + """ + o_direct = getattr(os, "O_DIRECT", 0) + if o_direct == 0: + return None # platform has no O_DIRECT to test + + flags = os.O_RDWR | os.O_CREAT | os.O_EXCL | os.O_CLOEXEC | o_direct + path = os.path.join(mountpoint, f".ais-odirect-{os.getpid()}-{os.urandom(4).hex()}") + fd = None + try: + fd = os.open(path, flags, 0o600) + return True + except OSError as e: + if e.errno == errno.EINVAL: + return False + # EROFS / EACCES / EPERM / ENOSPC etc. -> can't tell + return None + finally: + if fd is not None: + os.close(fd) + try: + os.unlink(path) + except OSError: + pass + + +def backing_storage(devno): + """ + Resolve a maj:min to (backing_type, disk_name) via sysfs. + + `backing_type` is a single short label describing what the filesystem + resides on: device-mapper targets are classified from their dm/uuid + (lvm, mpath, crypt, dm), software RAID as md, and raw block devices from + their kernel name (nvme, virtio, scsi, loop). Returns (None, None) when + sysfs cannot be read. + """ + try: + real = os.path.realpath(f"/sys/dev/block/{devno}") + except OSError: + return None, None + name = os.path.basename(real) + + # device-mapper targets (lvm, multipath, crypt, ...) carry a dm/uuid whose + # prefix names the target type. + try: + with open(os.path.join(real, "dm", "uuid"), "r", encoding="utf-8") as f: + uuid = f.read().strip() + except OSError: + uuid = None + if uuid is not None: + prefix = uuid.split("-", 1)[0].lower() + backing = {"lvm": "lvm", "mpath": "mpath", "crypt": "crypt"}.get(prefix, "dm") + elif name.startswith("md"): + backing = "md" + elif name.startswith("nvme"): + backing = "nvme" + elif name.startswith(("vd", "xvd")): + backing = "virtio" + elif name.startswith(("sd", "hd", "sr")): + backing = "scsi" + elif name.startswith("loop"): + backing = "loop" + else: + backing = name or None + + return backing, name + + +def tri(value, unknown="?"): + """Render an Optional[bool] as a short cell.""" + if value is True: + return "yes" + if value is False: + return "no" + return unknown + + +def collect(mounts): + """Build a list of result rows for the block-backed mounts.""" + rows = [] + for m in mounts: + if m.fstype in PSEUDO_FSTYPES: + continue + if not m.source.startswith("/dev/") and ":" not in m.source: + continue + + backing, disk = backing_storage(m.devno) + supported_fs = fs_supported(m) + + # Only probe fs types that can qualify; unsupported fs is never capable. + odirect = probe_odirect(m.mountpoint) if supported_fs else None + capable = odirect if supported_fs else False + + rows.append( + { + "mountpoint": m.mountpoint, + "fstype": fstype_label(m), + "device": disk or m.source, + "backing": backing, + "odirect": odirect, + "capable": capable, + } + ) + return rows + + +def print_volume_table(rows): + """Print the collected volume rows as an aligned table.""" + headers = ["MOUNTPOINT", "FSTYPE", "DEVICE", "BACKING", "O_DIRECT", "HIPFILE"] + table = [headers] + for r in rows: + table.append( + [ + r["mountpoint"], + r["fstype"], + r["device"], + r["backing"] or "?", + tri(r["odirect"], unknown="unverified"), + tri(r["capable"], unknown="unverified"), + ] + ) + + widths = [max(len(row[i]) for row in table) for i in range(len(headers))] + for row in table: + print(" ".join(cell.ljust(widths[i]) for i, cell in enumerate(row))) + + +def capable_volumes(): + """ + Enumerate block-backed mounts and score them for hipFile capability. + + Returns (rows, any_capable) where rows is the list of per-volume result + dicts and any_capable is True if at least one volume is confidently capable. + """ + rows = collect(parse_mountinfo()) + return rows, any(r["capable"] is True for r in rows) + + def main(): """ - Parse command-line arguments, check AIS support in kernel/libraries, - optionally print the results, and return an exit code indicating - whether all required components support AIS. + Parse command-line arguments, check AIS support in kernel/libraries and + mounted volumes, optionally print the results, and return an exit code + indicating whether all required components support AIS. """ parser = argparse.ArgumentParser() @@ -255,11 +528,13 @@ def main(): args = parser.parse_args() hip_libraries = hip_runtime_supports_ais() + volume_rows, volumes_ok = capable_volumes() component_support = [ ("Kernel P2PDMA support", kernel_supports_p2pdma()), ("HIP runtime", any(hip_libraries.values())), ("amdgpu", amdgpu_supports_ais()), + ("hipFile-capable volume", volumes_ok), ] if not args.quiet: @@ -277,6 +552,13 @@ def main(): pretty_supported = "NOT supported" print(f"\t{lib} (AIS {pretty_supported})") + print() + print("Mounted volumes:") + if volume_rows: + print_volume_table(volume_rows) + else: + print("No block-backed volumes found.") + print() print("AIS support in:") for name, supported in component_support: diff --git a/projects/hipfile/tools/ais-check/tests/test_main.py b/projects/hipfile/tools/ais-check/tests/test_main.py index fd2b160da69..1e0e7e8593b 100644 --- a/projects/hipfile/tools/ais-check/tests/test_main.py +++ b/projects/hipfile/tools/ais-check/tests/test_main.py @@ -22,14 +22,28 @@ def stub_checks(monkeypatch, ais_check): Returns a setter that fixes each component's support and the HIP library map. """ - def configure(*, p2pdma=True, amdgpu=True, hip_libraries=None): + def configure(*, p2pdma=True, amdgpu=True, hip_libraries=None, volumes=True): if hip_libraries is None: hip_libraries = {"/opt/rocm/lib/libamdhip64.so": True} + # capable_volumes() touches /proc/self/mountinfo and probes O_DIRECT, so + # stub it to keep main() hermetic. Return one row whose capability tracks + # the requested `volumes` flag. + rows = [ + { + "mountpoint": "/data", + "fstype": "xfs", + "device": "nvme0n1", + "backing": "nvme", + "odirect": volumes, + "capable": volumes, + } + ] monkeypatch.setattr(ais_check, "kernel_supports_p2pdma", lambda: p2pdma) monkeypatch.setattr(ais_check, "amdgpu_supports_ais", lambda: amdgpu) monkeypatch.setattr( ais_check, "hip_runtime_supports_ais", lambda: dict(hip_libraries) ) + monkeypatch.setattr(ais_check, "capable_volumes", lambda: (rows, volumes)) monkeypatch.setattr( ais_check.os, "uname", @@ -56,6 +70,7 @@ def test_all_supported_exit_zero(monkeypatch, stub_checks, ais_check): {"amdgpu": False}, {"hip_libraries": {"/opt/rocm/lib/libamdhip64.so": False}}, {"hip_libraries": {}}, + {"volumes": False}, ], ) def test_any_missing_component_exit_nonzero( @@ -84,6 +99,10 @@ def test_default_output_lists_components(monkeypatch, capsys, stub_checks, ais_c assert "Kernel P2PDMA support" in out assert "HIP runtime" in out assert "amdgpu" in out + assert "hipFile-capable volume" in out + # The volume table is printed on every normal run. + assert "Mounted volumes:" in out + assert "MOUNTPOINT" in out # The uname banner is printed. assert "Linux host 6.6.0" in out diff --git a/projects/hipfile/tools/ais-check/tests/test_volumes.py b/projects/hipfile/tools/ais-check/tests/test_volumes.py new file mode 100644 index 00000000000..d69b13115a7 --- /dev/null +++ b/projects/hipfile/tools/ais-check/tests/test_volumes.py @@ -0,0 +1,86 @@ +# Copyright (c) Advanced Micro Devices, Inc. All rights reserved. +# +# SPDX-License-Identifier: MIT + +"""Tests for the volume-scan logic absorbed from ais-volumes.""" + +# pylint: disable=missing-function-docstring,redefined-outer-name,unused-argument + +import pytest + + +def _mount(ais_check, *, fstype, options="", source="/dev/nvme0n1", mountpoint="/data"): + return ais_check.Mount("259:0", mountpoint, fstype, source, options) + + +def test_fs_supported_xfs_always(ais_check): + assert ais_check.fs_supported(_mount(ais_check, fstype="xfs")) is True + + +@pytest.mark.parametrize( + "options,expected", + [ + ("rw", True), # no data= option -> ordered default + ("rw,data=ordered", True), + ("rw,data=writeback", False), + ("rw,data=journal", False), + ], +) +def test_fs_supported_ext4_data_mode(ais_check, options, expected): + assert ( + ais_check.fs_supported(_mount(ais_check, fstype="ext4", options=options)) + is expected + ) + + +def test_fs_supported_other_fs_rejected(ais_check): + assert ais_check.fs_supported(_mount(ais_check, fstype="btrfs")) is False + + +def test_fstype_label_folds_ext4_journal_mode(ais_check): + assert ais_check.fstype_label(_mount(ais_check, fstype="ext4")) == "ext4 (ordered)" + labelled = ais_check.fstype_label( + _mount(ais_check, fstype="ext4", options="rw,data=writeback") + ) + assert labelled == "ext4 (writeback)" + assert ais_check.fstype_label(_mount(ais_check, fstype="xfs")) == "xfs" + + +def test_collect_skips_pseudo_and_non_block(ais_check, monkeypatch): + monkeypatch.setattr(ais_check, "backing_storage", lambda devno: ("nvme", "nvme0n1")) + monkeypatch.setattr(ais_check, "probe_odirect", lambda mp: True) + + mounts = [ + _mount(ais_check, fstype="tmpfs", source="tmpfs", mountpoint="/run"), + _mount(ais_check, fstype="xfs", source="/dev/nvme0n1", mountpoint="/data"), + ] + rows = ais_check.collect(mounts) + + assert [r["mountpoint"] for r in rows] == ["/data"] + assert rows[0]["capable"] is True + + +def test_collect_unsupported_fs_not_probed(ais_check, monkeypatch): + monkeypatch.setattr(ais_check, "backing_storage", lambda devno: ("nvme", "nvme0n1")) + + def _fail_probe(mp): + raise AssertionError("probe_odirect must not run on unsupported fs") + + monkeypatch.setattr(ais_check, "probe_odirect", _fail_probe) + + rows = ais_check.collect([_mount(ais_check, fstype="btrfs")]) + + assert rows[0]["capable"] is False + assert rows[0]["odirect"] is None + + +def test_capable_volumes_aggregates_any(ais_check, monkeypatch): + monkeypatch.setattr(ais_check, "parse_mountinfo", lambda: []) + monkeypatch.setattr( + ais_check, + "collect", + lambda mounts: [{"capable": False}, {"capable": True}], + ) + rows, any_capable = ais_check.capable_volumes() + assert any_capable is True + assert len(rows) == 2 From 9ae2c313addbfc910d3515fa61eeb4b09c9dd80b Mon Sep 17 00:00:00 2001 From: akolliasAMD Date: Wed, 24 Jun 2026 14:00:39 -0600 Subject: [PATCH 2/3] pylint changes --- projects/hipfile/tools/ais-check/ais-check | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/hipfile/tools/ais-check/ais-check b/projects/hipfile/tools/ais-check/ais-check index 2f56ea81b15..796e3940fb9 100755 --- a/projects/hipfile/tools/ais-check/ais-check +++ b/projects/hipfile/tools/ais-check/ais-check @@ -39,7 +39,6 @@ import gzip import os import sys - # Mounts backed by these (or any non-block source) can never use the fast path. PSEUDO_FSTYPES = { "autofs", From a27b30900363ae9352d6d17dac78426dd090c1b8 Mon Sep 17 00:00:00 2001 From: akolliasAMD Date: Wed, 24 Jun 2026 14:21:40 -0600 Subject: [PATCH 3/3] copilot comments --- projects/hipfile/tools/ais-check/ais-check | 5 ++++- .../hipfile/tools/ais-check/tests/test_main.py | 2 +- .../hipfile/tools/ais-check/tests/test_volumes.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/projects/hipfile/tools/ais-check/ais-check b/projects/hipfile/tools/ais-check/ais-check index 796e3940fb9..a89367d5a41 100755 --- a/projects/hipfile/tools/ais-check/ais-check +++ b/projects/hipfile/tools/ais-check/ais-check @@ -451,7 +451,10 @@ def collect(mounts): for m in mounts: if m.fstype in PSEUDO_FSTYPES: continue - if not m.source.startswith("/dev/") and ":" not in m.source: + # Only consider local block devices. Network filesystems (NFS + # "server:/export", CIFS "//server/share", sshfs, ...) are not + # block-backed. + if not m.source.startswith("/dev/"): continue backing, disk = backing_storage(m.devno) diff --git a/projects/hipfile/tools/ais-check/tests/test_main.py b/projects/hipfile/tools/ais-check/tests/test_main.py index 1e0e7e8593b..f5403a6dc01 100644 --- a/projects/hipfile/tools/ais-check/tests/test_main.py +++ b/projects/hipfile/tools/ais-check/tests/test_main.py @@ -18,7 +18,7 @@ @pytest.fixture def stub_checks(monkeypatch, ais_check): """ - Stub the three component checks plus os.uname so main() runs hermetically. + Stub the four component checks plus os.uname so main() runs hermetically. Returns a setter that fixes each component's support and the HIP library map. """ diff --git a/projects/hipfile/tools/ais-check/tests/test_volumes.py b/projects/hipfile/tools/ais-check/tests/test_volumes.py index d69b13115a7..a428dd1830a 100644 --- a/projects/hipfile/tools/ais-check/tests/test_volumes.py +++ b/projects/hipfile/tools/ais-check/tests/test_volumes.py @@ -60,6 +60,21 @@ def test_collect_skips_pseudo_and_non_block(ais_check, monkeypatch): assert rows[0]["capable"] is True +def test_collect_skips_network_mount_before_resolution(ais_check, monkeypatch): + def _fail_backing(devno): + raise AssertionError("backing_storage must not run on a non-block mount") + + def _fail_probe(mp): + raise AssertionError("probe_odirect must not run on a non-block mount") + + monkeypatch.setattr(ais_check, "backing_storage", _fail_backing) + monkeypatch.setattr(ais_check, "probe_odirect", _fail_probe) + + # NFS export: virtual device (major 0) and a "server:/export" source. + nfs = ais_check.Mount("0:42", "/mnt/nfs", "nfs4", "server:/export", "rw") + assert ais_check.collect([nfs]) == [] + + def test_collect_unsupported_fs_not_probed(ais_check, monkeypatch): monkeypatch.setattr(ais_check, "backing_storage", lambda devno: ("nvme", "nvme0n1"))