From 10da91896802a7bdb72d141a2a1f4b5cf4ef2b5f Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Thu, 8 Jan 2026 12:45:18 -0800 Subject: [PATCH 1/7] gaudi_gdr ucx plugin support in install nixl Signed-off-by: Daniel Huang --- install_nixl.py | 228 ++++++++++++++++++++++++++++++------------------ 1 file changed, 145 insertions(+), 83 deletions(-) diff --git a/install_nixl.py b/install_nixl.py index b8a04ac19..2f171e990 100644 --- a/install_nixl.py +++ b/install_nixl.py @@ -1,38 +1,26 @@ -# install_prerequisites.py +# install_nixl.py +import argparse +import glob import os import subprocess import sys -import argparse -import glob -import json -import urllib + # --- Configuration --- WHEELS_CACHE_HOME = os.environ.get("WHEELS_CACHE_HOME", "/tmp/wheels_cache") ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) -UCX_DIR = os.path.join('/tmp', 'ucx_source') -NIXL_DIR = os.path.join('/tmp', 'nixl_source') -UCX_INSTALL_DIR = os.path.join('/tmp', 'ucx_install') -UCX_REPO_URL = 'https://github.com/openucx/ucx.git' -NIXL_REPO_URL = 'https://github.com/ai-dynamo/nixl.git' - - -# --- Helper Functions --- -def get_latest_nixl_version(): - """Helper function to get latest release version of NIXL""" - try: - nixl_release_url = "https://api.github.com/repos/ai-dynamo/nixl/releases/latest" - with urllib.request.urlopen(nixl_release_url) as response: - data = json.load(response) - return data.get("tag_name", "0.7.0") - except Exception: - return "0.7.0" +UCX_DIR = os.path.join("/tmp", "ucx_source") +NIXL_DIR = os.path.join("/tmp", "nixl_source") +UCX_INSTALL_DIR = os.path.join("/tmp", "ucx_install") +UCX_REPO_URL = "https://github.com/openucx/ucx.git" +NIXL_REPO_URL = "https://github.com/ai-dynamo/nixl.git" +DEFAULT_UCX_COMMIT = "1df7b045d36c1e84f2fe9f251de83fb9103fc80e" +NIXL_VERSION = os.environ.get("NIXL_VERSION", "0.7.0") -NIXL_VERSION = os.environ.get("NIXL_VERSION", get_latest_nixl_version()) - -def run_command(command, cwd='.', env=None): +# --- Helper Functions --- +def run_command(command, cwd=".", env=None): """Helper function to run a shell command and check for errors.""" print(f"--> Running command: {' '.join(command)} in '{cwd}'", flush=True) subprocess.check_call(command, cwd=cwd, env=env) @@ -40,9 +28,11 @@ def run_command(command, cwd='.', env=None): def is_pip_package_installed(package_name): """Checks if a package is installed via pip without raising an exception.""" - result = subprocess.run([sys.executable, '-m', 'pip', 'show', package_name], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + result = subprocess.run( + [sys.executable, "-m", "pip", "show", package_name], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) return result.returncode == 0 @@ -62,10 +52,18 @@ def install_system_dependencies(): """Installs required system packages using apt-get if run as root.""" if os.geteuid() != 0: print("\n---", flush=True) - print("WARNING: Not running as root. Skipping system dependency installation.", flush=True) - print("Please ensure the following packages are installed on your system:", flush=True) - print(" patchelf build-essential git cmake ninja-build autotools-dev automake meson libtool libtool-bin", - flush=True) + print( + "WARNING: Not running as root. Skipping system dependency installation.", + flush=True, + ) + print( + "Please ensure the following packages are installed on your system:", + flush=True, + ) + print( + " patchelf build-essential git cmake ninja-build autotools-dev automake meson libtool libtool-bin", + flush=True, + ) print("---\n", flush=True) return @@ -80,10 +78,10 @@ def install_system_dependencies(): "automake", "meson", "libtool", - "libtool-bin" + "libtool-bin", ] - run_command(['apt-get', 'update']) - run_command(['apt-get', 'install', '-y'] + apt_packages) + run_command(["apt-get", "update"]) + run_command(["apt-get", "install", "-y"] + apt_packages) print("--- System dependencies installed successfully. ---\n", flush=True) @@ -91,22 +89,28 @@ def build_and_install_prerequisites(args): """Builds UCX and NIXL from source, creating a self-contained wheel.""" # ... (initial checks and setup are unchanged) ... - if not args.force_reinstall and is_pip_package_installed('nixl'): + if not args.force_reinstall and is_pip_package_installed("nixl"): print("--> NIXL is already installed. Nothing to do.", flush=True) return cached_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME) if not args.force_reinstall and cached_wheel: - print(f"\n--> Found self-contained wheel: {os.path.basename(cached_wheel)}.", flush=True) + print( + f"\n--> Found self-contained wheel: {os.path.basename(cached_wheel)}.", + flush=True, + ) print("--> Installing from cache, skipping all source builds.", flush=True) - install_command = [sys.executable, '-m', 'pip', 'install', cached_wheel] + install_command = [sys.executable, "-m", "pip", "install", cached_wheel] run_command(install_command) print("\n--- Installation from cache complete. ---", flush=True) return - print("\n--> No installed package or cached wheel found. Starting full build process...", flush=True) + print( + "\n--> No installed package or cached wheel found. Starting full build process...", + flush=True, + ) print("\n--> Installing auditwheel...", flush=True) - run_command([sys.executable, '-m', 'pip', 'install', 'auditwheel']) + run_command([sys.executable, "-m", "pip", "install", "auditwheel"]) install_system_dependencies() ucx_install_path = os.path.abspath(UCX_INSTALL_DIR) print(f"--> Using wheel cache directory: {WHEELS_CACHE_HOME}", flush=True) @@ -114,53 +118,72 @@ def build_and_install_prerequisites(args): # -- Step 1: Build UCX from source -- # ... (UCX build process is unchanged) ... - print("\n[1/3] Configuring and building UCX from source...", flush=True) + print("\n[1/4] Configuring and building UCX from source...", flush=True) if not os.path.exists(UCX_DIR): - run_command(['git', 'clone', UCX_REPO_URL, UCX_DIR]) + run_command(["git", "clone", UCX_REPO_URL, UCX_DIR]) ucx_source_path = os.path.abspath(UCX_DIR) - run_command(['git', 'checkout', 'v1.19.x'], cwd=ucx_source_path) - run_command(['./autogen.sh'], cwd=ucx_source_path) + run_command(["git", "checkout", args.ucx_commit], cwd=ucx_source_path) + run_command(["./autogen.sh"], cwd=ucx_source_path) configure_command = [ - './configure', - f'--prefix={ucx_install_path}', - '--enable-shared', - '--disable-static', - '--disable-doxygen-doc', - '--enable-optimizations', - '--enable-cma', - '--enable-devel-headers', - '--with-verbs', - '--enable-mt', + "./configure", + f"--prefix={ucx_install_path}", + "--enable-shared", + "--disable-static", + "--disable-doxygen-doc", + "--enable-optimizations", + "--enable-cma", + "--enable-devel-headers", + "--with-verbs", + "--enable-mt", + "--with-gaudi=yes", + "--with-mlx5=no", + "--enable-examples", ] run_command(configure_command, cwd=ucx_source_path) - run_command(['make', '-j', str(os.cpu_count() or 1)], cwd=ucx_source_path) - run_command(['make', 'install'], cwd=ucx_source_path) + run_command(["make", "-j", str(os.cpu_count() or 1)], cwd=ucx_source_path) + run_command(["make", "install-strip"], cwd=ucx_source_path) print("--- UCX build and install complete ---", flush=True) # -- Step 2: Build NIXL wheel from source -- - print("\n[2/3] Building NIXL wheel from source...", flush=True) + print("\n[2/4] Building NIXL wheel from source...", flush=True) if not os.path.exists(NIXL_DIR): - run_command(['git', 'clone', NIXL_REPO_URL, NIXL_DIR]) + run_command(["git", "clone", NIXL_REPO_URL, NIXL_DIR]) else: run_command(["git", "fetch", "--tags"], cwd=NIXL_DIR) run_command(["git", "checkout", NIXL_VERSION], cwd=NIXL_DIR) print(f"--> Checked out NIXL version: {NIXL_VERSION}", flush=True) build_env = os.environ.copy() - build_env['PKG_CONFIG_PATH'] = os.path.join(ucx_install_path, 'lib', 'pkgconfig') - ucx_lib_path = os.path.join(ucx_install_path, 'lib') - ucx_plugin_path = os.path.join(ucx_lib_path, 'ucx') - existing_ld_path = os.environ.get('LD_LIBRARY_PATH', '') - build_env['LD_LIBRARY_PATH'] = f"{ucx_lib_path}:{ucx_plugin_path}:{existing_ld_path}".strip(':') + build_env["PKG_CONFIG_PATH"] = os.path.join(ucx_install_path, "lib", "pkgconfig") + ucx_lib_path = os.path.join(ucx_install_path, "lib") + ucx_plugin_path = os.path.join(ucx_lib_path, "ucx") + existing_ld_path = os.environ.get("LD_LIBRARY_PATH", "") + build_env["LD_LIBRARY_PATH"] = ( + f"{ucx_lib_path}:{ucx_plugin_path}:{existing_ld_path}".strip(":") + ) print(f"--> Using LD_LIBRARY_PATH: {build_env['LD_LIBRARY_PATH']}", flush=True) - temp_wheel_dir = os.path.join(ROOT_DIR, 'temp_wheelhouse') - run_command([sys.executable, '-m', 'pip', 'wheel', '.', '--no-deps', f'--wheel-dir={temp_wheel_dir}'], - cwd=os.path.abspath(NIXL_DIR), - env=build_env) + temp_wheel_dir = os.path.join(ROOT_DIR, "temp_wheelhouse") + run_command( + [ + sys.executable, + "-m", + "pip", + "wheel", + ".", + "--no-deps", + f"--wheel-dir={temp_wheel_dir}", + "-C", + f"setup-args=-Ducx_path={ucx_install_path}", + "-C", + "setup-args=-Ddisable_gds_backend=false", + ], + cwd=os.path.abspath(NIXL_DIR), + env=build_env, + ) # -- Step 3: Repair the wheel, excluding the already-bundled plugin -- - print("\n[3/3] Repairing NIXL wheel to include UCX libraries...", flush=True) + print("\n[3/4] Repairing NIXL wheel to include UCX libraries...", flush=True) unrepaired_wheel = find_nixl_wheel_in_cache(temp_wheel_dir) if not unrepaired_wheel: raise RuntimeError("Failed to find the NIXL wheel after building it.") @@ -168,27 +191,57 @@ def build_and_install_prerequisites(args): # --- 👇 THE CORRECTED COMMAND 👇 --- # We tell auditwheel to ignore the plugin that mesonpy already handled. auditwheel_command = [ - 'auditwheel', - 'repair', - '--exclude', - 'libplugin_UCX.so', # <-- Exclude the problematic library + "auditwheel", + "repair", + "--exclude", + "libplugin_UCX.so", + "--exclude", + "libplugin_UCX_MO.so", unrepaired_wheel, - f'--wheel-dir={WHEELS_CACHE_HOME}' + f"--wheel-dir={WHEELS_CACHE_HOME}", ] run_command(auditwheel_command, env=build_env) # --- 👆 END CORRECTION 👆 --- - # --- CLEANUP --- + # -- Step 4: Bundle UCX plugins into the repaired wheel -- + print("\n[4/4] Bundling UCX plugins into the wheel...", flush=True) + repaired_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME) + + ucx_plugins_src = os.path.join(ucx_install_path, "lib", "ucx") + helper_script = os.path.join(NIXL_DIR, "contrib", "wheel_add_ucx_plugins.py") + + if os.path.exists(helper_script) and os.path.exists(ucx_plugins_src): + # Patch the helper script to skip NIXL plugins (since we only want UCX) + # This prevents it from failing when it can't find system NIXL plugins + sed_expr = 's/add_plugins(wheel_path, args.nixl_plugins_dir, "nixl")/# &/' + run_command(["sed", "-i", sed_expr, helper_script]) + + print(f"--> Adding plugins from {ucx_plugins_src}", flush=True) + bundle_cmd = [ + sys.executable, + helper_script, + "--ucx-plugins-dir", + ucx_plugins_src, + repaired_wheel, + ] + run_command(bundle_cmd, env=build_env) + else: + print( + f"--> Warning: Helper script or UCX plugins not found. Skipping bundling.", + flush=True, + ) + # No more temporary files to remove, just the temp wheelhouse - run_command(['rm', '-rf', temp_wheel_dir]) - # --- END CLEANUP --- + run_command(["rm", "-rf", temp_wheel_dir]) newly_built_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME) if not newly_built_wheel: raise RuntimeError("Failed to find the repaired NIXL wheel.") - print(f"--> Successfully built self-contained wheel: {os.path.basename(newly_built_wheel)}. Now installing...", - flush=True) + print( + f"--> Successfully built self-contained wheel: {os.path.basename(newly_built_wheel)}. Now installing...", + flush=True, + ) install_command = [ sys.executable, "-m", @@ -198,16 +251,25 @@ def build_and_install_prerequisites(args): newly_built_wheel, ] if args.force_reinstall: - install_command.insert(-1, '--force-reinstall') + install_command.insert(-1, "--force-reinstall") run_command(install_command) print("--- NIXL installation complete ---", flush=True) if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Build and install UCX and NIXL dependencies.") - parser.add_argument('--force-reinstall', - action='store_true', - help='Force rebuild and reinstall of UCX and NIXL even if they are already installed.') + parser = argparse.ArgumentParser( + description="Build and install UCX and NIXL dependencies." + ) + parser.add_argument( + "--force-reinstall", + action="store_true", + help="Force rebuild and reinstall of UCX and NIXL even if they are already installed.", + ) + parser.add_argument( + "--ucx-commit", + default=DEFAULT_UCX_COMMIT, + help=f"UCX commit to build (default: {DEFAULT_UCX_COMMIT})", + ) args = parser.parse_args() build_and_install_prerequisites(args) From d9576e1b4ac4122b9f24a6aa96193c513932198a Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Thu, 8 Jan 2026 12:54:52 -0800 Subject: [PATCH 2/7] Add comment on ucx commit Signed-off-by: Daniel Huang --- install_nixl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/install_nixl.py b/install_nixl.py index 2f171e990..6462b5d82 100644 --- a/install_nixl.py +++ b/install_nixl.py @@ -15,6 +15,7 @@ UCX_REPO_URL = "https://github.com/openucx/ucx.git" NIXL_REPO_URL = "https://github.com/ai-dynamo/nixl.git" +# Latest good commit with gaudi_gdr support DEFAULT_UCX_COMMIT = "1df7b045d36c1e84f2fe9f251de83fb9103fc80e" NIXL_VERSION = os.environ.get("NIXL_VERSION", "0.7.0") From 63532e3ba1b6229a908c561c221191f915708705 Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Thu, 8 Jan 2026 13:23:46 -0800 Subject: [PATCH 3/7] Bring in functions directly for more robust plugin bundling Signed-off-by: Daniel Huang --- install_nixl.py | 236 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 212 insertions(+), 24 deletions(-) diff --git a/install_nixl.py b/install_nixl.py index 6462b5d82..1bf8a20ae 100644 --- a/install_nixl.py +++ b/install_nixl.py @@ -4,6 +4,13 @@ import os import subprocess import sys +import base64 +import csv +import hashlib +import logging +import shutil +import tempfile +import zipfile # --- Configuration --- @@ -19,6 +26,9 @@ DEFAULT_UCX_COMMIT = "1df7b045d36c1e84f2fe9f251de83fb9103fc80e" NIXL_VERSION = os.environ.get("NIXL_VERSION", "0.7.0") +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # --- Helper Functions --- def run_command(command, cwd=".", env=None): @@ -86,6 +96,202 @@ def install_system_dependencies(): print("--- System dependencies installed successfully. ---\n", flush=True) +# --- Wheel Manipulation Helpers --- +def extract_wheel(wheel_path): + """ + Extract the wheel to a temporary directory. + Returns: + Path to the temporary directory. The caller is responsible for cleaning up the directory. + """ + temp_dir = tempfile.mkdtemp() + logger.info("Extracting wheel %s to %s", wheel_path, temp_dir) + with zipfile.ZipFile(wheel_path, "r") as zip_ref: + zip_ref.extractall(temp_dir) + return temp_dir + + +def update_wheel_record_file(temp_dir): + """ + Update the RECORD file in the wheel to include the hashes and sizes of all files. + """ + dist_info_dir = None + for entry in os.listdir(temp_dir): + if entry.endswith(".dist-info"): + dist_info_dir = entry + break + if dist_info_dir is None: + raise RuntimeError("No .dist-info directory found in wheel") + + record_path = os.path.join(temp_dir, dist_info_dir, "RECORD") + + def hash_and_size(file_path): + h = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + h.update(chunk) + digest = base64.urlsafe_b64encode(h.digest()).rstrip(b"=").decode("ascii") + size = os.path.getsize(file_path) + return f"sha256={digest}", str(size) + + entries = [] + for root, _, files in os.walk(temp_dir): + for filename in files: + full_path = os.path.join(root, filename) + rel_path = os.path.relpath(full_path, temp_dir).replace(os.sep, "/") + if rel_path == f"{dist_info_dir}/RECORD": + # RECORD file itself: no hash or size + entries.append((rel_path, "", "")) + else: + file_hash, file_size = hash_and_size(full_path) + entries.append((rel_path, file_hash, file_size)) + + with open(record_path, "w", newline="") as rec_file: + writer = csv.writer(rec_file) + writer.writerows(entries) + + +def create_wheel(wheel_path, temp_dir): + """ + Create a wheel from a temporary directory. + """ + logger.info("Creating wheel %s from %s", wheel_path, temp_dir) + update_wheel_record_file(temp_dir) + with zipfile.ZipFile(wheel_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zip_ref: + for root, _, files in os.walk(temp_dir): + for file in files: + abs_path = os.path.join(root, file) + rel_path = os.path.relpath(abs_path, start=temp_dir) + zip_ref.write(abs_path, arcname=rel_path) + + +def get_repaired_lib_name_map(libs_dir): + """ + auditwheel repair renames all libs to include a hash of the library name + e.g. "nixl.libs/libboost_atomic-fb1368c6.so.1.66.0" + Extract mapping from base name (like "libboost_atomic") to full file name + (like "libboost_atomic-fb1368c6.so.1.66.0"). + """ + name_map = {} + for fname in sorted(os.listdir(libs_dir)): + if os.path.isfile(os.path.join(libs_dir, fname)) and ".so" in fname and "-" in fname: + base_name = fname.split("-")[0] + name_map[base_name] = fname + print(f"Found already bundled lib: {base_name} -> {fname}") + return name_map + + +def get_lib_deps(lib_path): + """ + Get the dependencies of a library, as a map from library name to path. + """ + deps = os.popen(f"ldd {lib_path}").read().strip().split("\n") + ret = {} + for dep in deps: + if "=>" in dep: + left, right = dep.split("=>", 1) + dep_name = left.strip() + right = right.strip() + if right == "not found": + ret[dep_name] = None + else: + dep_path = right.split(" ")[0].strip() + ret[dep_name] = dep_path + return ret + + +def copytree(src, dst): + """ + Copy a tree of files from @src directory to @dst directory. + Similar to shutil.copytree, but returns a list of all files copied. + """ + copied_files = [] + for root, dirs, files in os.walk(src): + rel_path = os.path.relpath(root, src) + dst_dir = os.path.join(dst, rel_path) + os.makedirs(dst_dir, exist_ok=True) + for file in files: + src_file = os.path.join(root, file) + dst_file = os.path.join(dst_dir, file) + shutil.copy2(src_file, dst_file) + copied_files.append(dst_file) + return copied_files + + +def add_plugins(wheel_path, sys_plugins_dir, install_dirname): + """ + Adds the plugins from @sys_dir to the wheel. + The plugins are copied to a subdirectory @install_dir relative to the wheel's nixl.libs. + The plugins are patched to load their dependencies from the wheel. + The wheel file is then recreated. + """ + temp_dir = extract_wheel(wheel_path) + + pkg_name = wheel_path.split("/")[-1].split("-")[0] + pkg_libs_dir = os.path.join(temp_dir, f"{pkg_name}.libs") + if not os.path.exists(pkg_libs_dir): + raise FileNotFoundError(f"{pkg_name}.libs directory not found in wheel: {wheel_path}") + + logger.debug("Listing existing libs:") + name_map = get_repaired_lib_name_map(pkg_libs_dir) + + # Ensure that all of them in name_map have RPATH set to $ORIGIN + for fname in name_map.values(): + fpath = os.path.join(pkg_libs_dir, fname) + rpath = os.popen(f"patchelf --print-rpath {fpath}").read().strip() + if "$ORIGIN" in rpath.split(":"): + continue + if not rpath: + rpath = "$ORIGIN" + else: + rpath = "$ORIGIN:" + rpath + logger.debug("Setting rpath for %s to %s", fpath, rpath) + ret = os.system(f"patchelf --set-rpath '{rpath}' {fpath}") + if ret != 0: + raise RuntimeError(f"Failed to set rpath for {fpath}") + + pkg_plugins_dir = os.path.join(pkg_libs_dir, install_dirname) + logger.debug("Copying plugins from %s to %s", sys_plugins_dir, pkg_plugins_dir) + copied_files = copytree(sys_plugins_dir, pkg_plugins_dir) + if not copied_files: + raise RuntimeError(f"No plugins found in {sys_plugins_dir}") + + # Patch all libs to load plugin deps from the wheel + for fname in copied_files: + logger.debug("Patching %s", fname) + fpath = os.path.join(pkg_plugins_dir, fname) + if os.path.isfile(fpath) and ".so" in fname: + rpath = os.popen(f"patchelf --print-rpath {fpath}").read().strip() + if not rpath: + rpath = "$ORIGIN/..:$ORIGIN" + else: + rpath = "$ORIGIN/..:$ORIGIN:" + rpath + logger.debug("Setting rpath for %s to %s", fpath, rpath) + ret = os.system(f"patchelf --set-rpath '{rpath}' {fpath}") + if ret != 0: + raise RuntimeError(f"Failed to set rpath for {fpath}") + # Replace the original libs with the patched one + for libname, _ in get_lib_deps(fpath).items(): + # "libuct.so.0" -> "libuct" + base_name = libname.split(".")[0] + if base_name in name_map: + packaged_name = name_map[base_name] + logger.debug("Replacing %s with %s in %s", libname, packaged_name, fpath) + ret = os.system(f"patchelf --replace-needed {libname} {packaged_name} {fpath}") + if ret != 0: + raise RuntimeError(f"Failed to replace {libname} with {packaged_name} in {fpath}") + # Check that there is no breakage introduced in the patched lib + logger.debug("Checking that %s loads", fpath) + original_deps = get_lib_deps(os.path.join(sys_plugins_dir, fname)) + for libname, libpath in get_lib_deps(fpath).items(): + if libpath is None: + if libname not in original_deps or original_deps[libname] is not None: + raise RuntimeError(f"Library {libname} not loaded by {fpath}") + + create_wheel(wheel_path, temp_dir) + shutil.rmtree(temp_dir) + logger.info("Added plugins to wheel: %s", wheel_path) + + def build_and_install_prerequisites(args): """Builds UCX and NIXL from source, creating a self-contained wheel.""" @@ -159,9 +365,7 @@ def build_and_install_prerequisites(args): ucx_lib_path = os.path.join(ucx_install_path, "lib") ucx_plugin_path = os.path.join(ucx_lib_path, "ucx") existing_ld_path = os.environ.get("LD_LIBRARY_PATH", "") - build_env["LD_LIBRARY_PATH"] = ( - f"{ucx_lib_path}:{ucx_plugin_path}:{existing_ld_path}".strip(":") - ) + build_env["LD_LIBRARY_PATH"] = f"{ucx_lib_path}:{ucx_plugin_path}:{existing_ld_path}".strip(":") print(f"--> Using LD_LIBRARY_PATH: {build_env['LD_LIBRARY_PATH']}", flush=True) temp_wheel_dir = os.path.join(ROOT_DIR, "temp_wheelhouse") @@ -189,7 +393,6 @@ def build_and_install_prerequisites(args): if not unrepaired_wheel: raise RuntimeError("Failed to find the NIXL wheel after building it.") - # --- 👇 THE CORRECTED COMMAND 👇 --- # We tell auditwheel to ignore the plugin that mesonpy already handled. auditwheel_command = [ "auditwheel", @@ -202,33 +405,20 @@ def build_and_install_prerequisites(args): f"--wheel-dir={WHEELS_CACHE_HOME}", ] run_command(auditwheel_command, env=build_env) - # --- 👆 END CORRECTION 👆 --- # -- Step 4: Bundle UCX plugins into the repaired wheel -- print("\n[4/4] Bundling UCX plugins into the wheel...", flush=True) repaired_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME) ucx_plugins_src = os.path.join(ucx_install_path, "lib", "ucx") - helper_script = os.path.join(NIXL_DIR, "contrib", "wheel_add_ucx_plugins.py") - - if os.path.exists(helper_script) and os.path.exists(ucx_plugins_src): - # Patch the helper script to skip NIXL plugins (since we only want UCX) - # This prevents it from failing when it can't find system NIXL plugins - sed_expr = 's/add_plugins(wheel_path, args.nixl_plugins_dir, "nixl")/# &/' - run_command(["sed", "-i", sed_expr, helper_script]) + if os.path.exists(ucx_plugins_src): print(f"--> Adding plugins from {ucx_plugins_src}", flush=True) - bundle_cmd = [ - sys.executable, - helper_script, - "--ucx-plugins-dir", - ucx_plugins_src, - repaired_wheel, - ] - run_command(bundle_cmd, env=build_env) + # Direct call to the ported function + add_plugins(repaired_wheel, ucx_plugins_src, "ucx") else: print( - f"--> Warning: Helper script or UCX plugins not found. Skipping bundling.", + f"--> Warning: UCX plugins not found. Skipping bundling.", flush=True, ) @@ -259,9 +449,7 @@ def build_and_install_prerequisites(args): if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Build and install UCX and NIXL dependencies." - ) + parser = argparse.ArgumentParser(description="Build and install UCX and NIXL dependencies.") parser.add_argument( "--force-reinstall", action="store_true", From 82bcb62f2ccf78871e55f334bcbce8bb76b20061 Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Thu, 8 Jan 2026 13:32:01 -0800 Subject: [PATCH 4/7] Logging and cleanup Signed-off-by: Daniel Huang --- install_nixl.py | 108 ++++++++++++++++++++---------------------------- 1 file changed, 45 insertions(+), 63 deletions(-) diff --git a/install_nixl.py b/install_nixl.py index 1bf8a20ae..3a9bc9bb9 100644 --- a/install_nixl.py +++ b/install_nixl.py @@ -1,18 +1,17 @@ # install_nixl.py import argparse -import glob -import os -import subprocess -import sys import base64 import csv +import glob import hashlib import logging +import os import shutil +import subprocess +import sys import tempfile import zipfile - # --- Configuration --- WHEELS_CACHE_HOME = os.environ.get("WHEELS_CACHE_HOME", "/tmp/wheels_cache") ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -26,14 +25,15 @@ DEFAULT_UCX_COMMIT = "1df7b045d36c1e84f2fe9f251de83fb9103fc80e" NIXL_VERSION = os.environ.get("NIXL_VERSION", "0.7.0") -logging.basicConfig(level=logging.INFO) +# Configure logging to output to stdout, matching the previous print behavior +logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stdout) logger = logging.getLogger(__name__) # --- Helper Functions --- def run_command(command, cwd=".", env=None): """Helper function to run a shell command and check for errors.""" - print(f"--> Running command: {' '.join(command)} in '{cwd}'", flush=True) + logger.info(f"--> Running command: {' '.join(command)} in '{cwd}'") subprocess.check_call(command, cwd=cwd, env=env) @@ -62,25 +62,18 @@ def find_nixl_wheel_in_cache(cache_dir): def install_system_dependencies(): """Installs required system packages using apt-get if run as root.""" if os.geteuid() != 0: - print("\n---", flush=True) - print( - "WARNING: Not running as root. Skipping system dependency installation.", - flush=True, - ) - print( - "Please ensure the following packages are installed on your system:", - flush=True, - ) - print( - " patchelf build-essential git cmake ninja-build autotools-dev automake meson libtool libtool-bin", - flush=True, + logger.warning( + "\n---\n" + "WARNING: Not running as root. Skipping system dependency installation.\n" + "Please ensure the following packages are installed on your system:\n" + " patchelf build-essential git cmake ninja-build autotools-dev automake meson libtool libtool-bin\n" + "---\n" ) - print("---\n", flush=True) return - print("--- Running as root. Installing system dependencies... ---", flush=True) + logger.info("--- Running as root. Installing system dependencies... ---") apt_packages = [ - "patchelf", # <-- Add patchelf here + "patchelf", "build-essential", "git", "cmake", @@ -93,7 +86,7 @@ def install_system_dependencies(): ] run_command(["apt-get", "update"]) run_command(["apt-get", "install", "-y"] + apt_packages) - print("--- System dependencies installed successfully. ---\n", flush=True) + logger.info("--- System dependencies installed successfully. ---\n") # --- Wheel Manipulation Helpers --- @@ -104,7 +97,7 @@ def extract_wheel(wheel_path): Path to the temporary directory. The caller is responsible for cleaning up the directory. """ temp_dir = tempfile.mkdtemp() - logger.info("Extracting wheel %s to %s", wheel_path, temp_dir) + logger.info(f"Extracting wheel {wheel_path} to {temp_dir}") with zipfile.ZipFile(wheel_path, "r") as zip_ref: zip_ref.extractall(temp_dir) return temp_dir @@ -154,7 +147,7 @@ def create_wheel(wheel_path, temp_dir): """ Create a wheel from a temporary directory. """ - logger.info("Creating wheel %s from %s", wheel_path, temp_dir) + logger.info(f"Creating wheel {wheel_path} from {temp_dir}") update_wheel_record_file(temp_dir) with zipfile.ZipFile(wheel_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zip_ref: for root, _, files in os.walk(temp_dir): @@ -176,7 +169,7 @@ def get_repaired_lib_name_map(libs_dir): if os.path.isfile(os.path.join(libs_dir, fname)) and ".so" in fname and "-" in fname: base_name = fname.split("-")[0] name_map[base_name] = fname - print(f"Found already bundled lib: {base_name} -> {fname}") + logger.info(f"Found already bundled lib: {base_name} -> {fname}") return name_map @@ -244,20 +237,20 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): rpath = "$ORIGIN" else: rpath = "$ORIGIN:" + rpath - logger.debug("Setting rpath for %s to %s", fpath, rpath) + logger.debug(f"Setting rpath for {fpath} to {rpath}") ret = os.system(f"patchelf --set-rpath '{rpath}' {fpath}") if ret != 0: raise RuntimeError(f"Failed to set rpath for {fpath}") pkg_plugins_dir = os.path.join(pkg_libs_dir, install_dirname) - logger.debug("Copying plugins from %s to %s", sys_plugins_dir, pkg_plugins_dir) + logger.debug(f"Copying plugins from {sys_plugins_dir} to {pkg_plugins_dir}") copied_files = copytree(sys_plugins_dir, pkg_plugins_dir) if not copied_files: raise RuntimeError(f"No plugins found in {sys_plugins_dir}") # Patch all libs to load plugin deps from the wheel for fname in copied_files: - logger.debug("Patching %s", fname) + logger.debug(f"Patching {fname}") fpath = os.path.join(pkg_plugins_dir, fname) if os.path.isfile(fpath) and ".so" in fname: rpath = os.popen(f"patchelf --print-rpath {fpath}").read().strip() @@ -265,7 +258,7 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): rpath = "$ORIGIN/..:$ORIGIN" else: rpath = "$ORIGIN/..:$ORIGIN:" + rpath - logger.debug("Setting rpath for %s to %s", fpath, rpath) + logger.debug(f"Setting rpath for {fpath} to {rpath}") ret = os.system(f"patchelf --set-rpath '{rpath}' {fpath}") if ret != 0: raise RuntimeError(f"Failed to set rpath for {fpath}") @@ -275,12 +268,12 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): base_name = libname.split(".")[0] if base_name in name_map: packaged_name = name_map[base_name] - logger.debug("Replacing %s with %s in %s", libname, packaged_name, fpath) + logger.debug(f"Replacing {libname} with {packaged_name} in {fpath}") ret = os.system(f"patchelf --replace-needed {libname} {packaged_name} {fpath}") if ret != 0: raise RuntimeError(f"Failed to replace {libname} with {packaged_name} in {fpath}") # Check that there is no breakage introduced in the patched lib - logger.debug("Checking that %s loads", fpath) + logger.debug(f"Checking that {fpath} loads") original_deps = get_lib_deps(os.path.join(sys_plugins_dir, fname)) for libname, libpath in get_lib_deps(fpath).items(): if libpath is None: @@ -289,7 +282,7 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): create_wheel(wheel_path, temp_dir) shutil.rmtree(temp_dir) - logger.info("Added plugins to wheel: %s", wheel_path) + logger.info(f"Added plugins to wheel: {wheel_path}") def build_and_install_prerequisites(args): @@ -297,35 +290,28 @@ def build_and_install_prerequisites(args): # ... (initial checks and setup are unchanged) ... if not args.force_reinstall and is_pip_package_installed("nixl"): - print("--> NIXL is already installed. Nothing to do.", flush=True) + logger.info("--> NIXL is already installed. Nothing to do.") return cached_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME) if not args.force_reinstall and cached_wheel: - print( - f"\n--> Found self-contained wheel: {os.path.basename(cached_wheel)}.", - flush=True, - ) - print("--> Installing from cache, skipping all source builds.", flush=True) + logger.info(f"\n--> Found self-contained wheel: {os.path.basename(cached_wheel)}.") + logger.info("--> Installing from cache, skipping all source builds.") install_command = [sys.executable, "-m", "pip", "install", cached_wheel] run_command(install_command) - print("\n--- Installation from cache complete. ---", flush=True) + logger.info("\n--- Installation from cache complete. ---") return - print( - "\n--> No installed package or cached wheel found. Starting full build process...", - flush=True, - ) - print("\n--> Installing auditwheel...", flush=True) + logger.info("\n--> No installed package or cached wheel found. Starting full build process...") + logger.info("\n--> Installing auditwheel...") run_command([sys.executable, "-m", "pip", "install", "auditwheel"]) install_system_dependencies() ucx_install_path = os.path.abspath(UCX_INSTALL_DIR) - print(f"--> Using wheel cache directory: {WHEELS_CACHE_HOME}", flush=True) + logger.info(f"--> Using wheel cache directory: {WHEELS_CACHE_HOME}") os.makedirs(WHEELS_CACHE_HOME, exist_ok=True) # -- Step 1: Build UCX from source -- - # ... (UCX build process is unchanged) ... - print("\n[1/4] Configuring and building UCX from source...", flush=True) + logger.info("\n[1/4] Configuring and building UCX from source...") if not os.path.exists(UCX_DIR): run_command(["git", "clone", UCX_REPO_URL, UCX_DIR]) ucx_source_path = os.path.abspath(UCX_DIR) @@ -349,16 +335,16 @@ def build_and_install_prerequisites(args): run_command(configure_command, cwd=ucx_source_path) run_command(["make", "-j", str(os.cpu_count() or 1)], cwd=ucx_source_path) run_command(["make", "install-strip"], cwd=ucx_source_path) - print("--- UCX build and install complete ---", flush=True) + logger.info("--- UCX build and install complete ---") # -- Step 2: Build NIXL wheel from source -- - print("\n[2/4] Building NIXL wheel from source...", flush=True) + logger.info("\n[2/4] Building NIXL wheel from source...") if not os.path.exists(NIXL_DIR): run_command(["git", "clone", NIXL_REPO_URL, NIXL_DIR]) else: run_command(["git", "fetch", "--tags"], cwd=NIXL_DIR) run_command(["git", "checkout", NIXL_VERSION], cwd=NIXL_DIR) - print(f"--> Checked out NIXL version: {NIXL_VERSION}", flush=True) + logger.info(f"--> Checked out NIXL version: {NIXL_VERSION}") build_env = os.environ.copy() build_env["PKG_CONFIG_PATH"] = os.path.join(ucx_install_path, "lib", "pkgconfig") @@ -366,7 +352,7 @@ def build_and_install_prerequisites(args): ucx_plugin_path = os.path.join(ucx_lib_path, "ucx") existing_ld_path = os.environ.get("LD_LIBRARY_PATH", "") build_env["LD_LIBRARY_PATH"] = f"{ucx_lib_path}:{ucx_plugin_path}:{existing_ld_path}".strip(":") - print(f"--> Using LD_LIBRARY_PATH: {build_env['LD_LIBRARY_PATH']}", flush=True) + logger.info(f"--> Using LD_LIBRARY_PATH: {build_env['LD_LIBRARY_PATH']}") temp_wheel_dir = os.path.join(ROOT_DIR, "temp_wheelhouse") run_command( @@ -388,7 +374,7 @@ def build_and_install_prerequisites(args): ) # -- Step 3: Repair the wheel, excluding the already-bundled plugin -- - print("\n[3/4] Repairing NIXL wheel to include UCX libraries...", flush=True) + logger.info("\n[3/4] Repairing NIXL wheel to include UCX libraries...") unrepaired_wheel = find_nixl_wheel_in_cache(temp_wheel_dir) if not unrepaired_wheel: raise RuntimeError("Failed to find the NIXL wheel after building it.") @@ -407,20 +393,17 @@ def build_and_install_prerequisites(args): run_command(auditwheel_command, env=build_env) # -- Step 4: Bundle UCX plugins into the repaired wheel -- - print("\n[4/4] Bundling UCX plugins into the wheel...", flush=True) + logger.info("\n[4/4] Bundling UCX plugins into the wheel...") repaired_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME) ucx_plugins_src = os.path.join(ucx_install_path, "lib", "ucx") if os.path.exists(ucx_plugins_src): - print(f"--> Adding plugins from {ucx_plugins_src}", flush=True) + logger.info(f"--> Adding plugins from {ucx_plugins_src}") # Direct call to the ported function add_plugins(repaired_wheel, ucx_plugins_src, "ucx") else: - print( - f"--> Warning: UCX plugins not found. Skipping bundling.", - flush=True, - ) + logger.info(f"--> Warning: UCX plugins not found. Skipping bundling.") # No more temporary files to remove, just the temp wheelhouse run_command(["rm", "-rf", temp_wheel_dir]) @@ -429,9 +412,8 @@ def build_and_install_prerequisites(args): if not newly_built_wheel: raise RuntimeError("Failed to find the repaired NIXL wheel.") - print( - f"--> Successfully built self-contained wheel: {os.path.basename(newly_built_wheel)}. Now installing...", - flush=True, + logger.info( + f"--> Successfully built self-contained wheel: {os.path.basename(newly_built_wheel)}. Now installing..." ) install_command = [ sys.executable, @@ -445,7 +427,7 @@ def build_and_install_prerequisites(args): install_command.insert(-1, "--force-reinstall") run_command(install_command) - print("--- NIXL installation complete ---", flush=True) + logger.info("--- NIXL installation complete ---") if __name__ == "__main__": From 18d0ffd547548f4664a822e5840638f28c380790 Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Thu, 8 Jan 2026 13:51:48 -0800 Subject: [PATCH 5/7] Update all system run commands to run_command Signed-off-by: Daniel Huang --- install_nixl.py | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/install_nixl.py b/install_nixl.py index 3a9bc9bb9..05e3ee018 100644 --- a/install_nixl.py +++ b/install_nixl.py @@ -31,10 +31,17 @@ # --- Helper Functions --- -def run_command(command, cwd=".", env=None): - """Helper function to run a shell command and check for errors.""" +def run_command(command, cwd=".", env=None, **kwargs): + """Helper function to run a shell command and check for errors. + + Returns: + subprocess.CompletedProcess: The result object containing returncode, stdout, etc. + """ logger.info(f"--> Running command: {' '.join(command)} in '{cwd}'") - subprocess.check_call(command, cwd=cwd, env=env) + # Default to check=True to raise exception on error, matching old check_call behavior + if "check" not in kwargs: + kwargs["check"] = True + return subprocess.run(command, cwd=cwd, env=env, **kwargs) def is_pip_package_installed(package_name): @@ -177,7 +184,8 @@ def get_lib_deps(lib_path): """ Get the dependencies of a library, as a map from library name to path. """ - deps = os.popen(f"ldd {lib_path}").read().strip().split("\n") + # Replaced os.popen with subprocess.check_output + deps = run_command(["ldd", lib_path], capture_output=True, text=True).stdout.strip().split("\n") ret = {} for dep in deps: if "=>" in dep: @@ -230,7 +238,8 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): # Ensure that all of them in name_map have RPATH set to $ORIGIN for fname in name_map.values(): fpath = os.path.join(pkg_libs_dir, fname) - rpath = os.popen(f"patchelf --print-rpath {fpath}").read().strip() + res = run_command(["patchelf", "--print-rpath", fpath], capture_output=True, text=True, check=False) + rpath = res.stdout.strip() if res.returncode == 0 else "" if "$ORIGIN" in rpath.split(":"): continue if not rpath: @@ -238,9 +247,7 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): else: rpath = "$ORIGIN:" + rpath logger.debug(f"Setting rpath for {fpath} to {rpath}") - ret = os.system(f"patchelf --set-rpath '{rpath}' {fpath}") - if ret != 0: - raise RuntimeError(f"Failed to set rpath for {fpath}") + run_command(["patchelf", "--set-rpath", rpath, fpath]) pkg_plugins_dir = os.path.join(pkg_libs_dir, install_dirname) logger.debug(f"Copying plugins from {sys_plugins_dir} to {pkg_plugins_dir}") @@ -253,15 +260,15 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): logger.debug(f"Patching {fname}") fpath = os.path.join(pkg_plugins_dir, fname) if os.path.isfile(fpath) and ".so" in fname: - rpath = os.popen(f"patchelf --print-rpath {fpath}").read().strip() + res = run_command(["patchelf", "--print-rpath", fpath], capture_output=True, text=True, check=False) + rpath = res.stdout.strip() if res.returncode == 0 else "" if not rpath: rpath = "$ORIGIN/..:$ORIGIN" else: rpath = "$ORIGIN/..:$ORIGIN:" + rpath logger.debug(f"Setting rpath for {fpath} to {rpath}") - ret = os.system(f"patchelf --set-rpath '{rpath}' {fpath}") - if ret != 0: - raise RuntimeError(f"Failed to set rpath for {fpath}") + run_command(["patchelf", "--set-rpath", rpath, fpath]) + # Replace the original libs with the patched one for libname, _ in get_lib_deps(fpath).items(): # "libuct.so.0" -> "libuct" @@ -269,9 +276,8 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): if base_name in name_map: packaged_name = name_map[base_name] logger.debug(f"Replacing {libname} with {packaged_name} in {fpath}") - ret = os.system(f"patchelf --replace-needed {libname} {packaged_name} {fpath}") - if ret != 0: - raise RuntimeError(f"Failed to replace {libname} with {packaged_name} in {fpath}") + run_command(["patchelf", "--replace-needed", libname, packaged_name, fpath]) + # Check that there is no breakage introduced in the patched lib logger.debug(f"Checking that {fpath} loads") original_deps = get_lib_deps(os.path.join(sys_plugins_dir, fname)) From 3fd271cb56ee6d1b91f1281378005ac6ccf5ffd5 Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Thu, 8 Jan 2026 13:57:45 -0800 Subject: [PATCH 6/7] Minor comment on where plugin bundle wheel functions come from Signed-off-by: Daniel Huang --- install_nixl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/install_nixl.py b/install_nixl.py index 05e3ee018..f36901d4c 100644 --- a/install_nixl.py +++ b/install_nixl.py @@ -97,6 +97,7 @@ def install_system_dependencies(): # --- Wheel Manipulation Helpers --- +# Adapted from https://github.com/ai-dynamo/nixl/blob/b20598f2906b7037f711060b937e7156bb83f27f/contrib/wheel_add_ucx_plugins.py def extract_wheel(wheel_path): """ Extract the wheel to a temporary directory. From 954cbd1ae132d3721b2d74ac7393ca1ff23b4bb6 Mon Sep 17 00:00:00 2001 From: Daniel Huang Date: Thu, 8 Jan 2026 14:18:09 -0800 Subject: [PATCH 7/7] Fix pre-commit checks Signed-off-by: Daniel Huang --- install_nixl.py | 57 +++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/install_nixl.py b/install_nixl.py index f36901d4c..364b7f41c 100644 --- a/install_nixl.py +++ b/install_nixl.py @@ -37,7 +37,7 @@ def run_command(command, cwd=".", env=None, **kwargs): Returns: subprocess.CompletedProcess: The result object containing returncode, stdout, etc. """ - logger.info(f"--> Running command: {' '.join(command)} in '{cwd}'") + logger.info("--> Running command: %s in '%s'", " ".join(command), cwd) # Default to check=True to raise exception on error, matching old check_call behavior if "check" not in kwargs: kwargs["check"] = True @@ -74,8 +74,7 @@ def install_system_dependencies(): "WARNING: Not running as root. Skipping system dependency installation.\n" "Please ensure the following packages are installed on your system:\n" " patchelf build-essential git cmake ninja-build autotools-dev automake meson libtool libtool-bin\n" - "---\n" - ) + "---\n") return logger.info("--- Running as root. Installing system dependencies... ---") @@ -105,7 +104,7 @@ def extract_wheel(wheel_path): Path to the temporary directory. The caller is responsible for cleaning up the directory. """ temp_dir = tempfile.mkdtemp() - logger.info(f"Extracting wheel {wheel_path} to {temp_dir}") + logger.info("Extracting wheel %s to %s", wheel_path, temp_dir) with zipfile.ZipFile(wheel_path, "r") as zip_ref: zip_ref.extractall(temp_dir) return temp_dir @@ -155,7 +154,7 @@ def create_wheel(wheel_path, temp_dir): """ Create a wheel from a temporary directory. """ - logger.info(f"Creating wheel {wheel_path} from {temp_dir}") + logger.info("Creating wheel %s from %s", wheel_path, temp_dir) update_wheel_record_file(temp_dir) with zipfile.ZipFile(wheel_path, "w", compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zip_ref: for root, _, files in os.walk(temp_dir): @@ -177,7 +176,7 @@ def get_repaired_lib_name_map(libs_dir): if os.path.isfile(os.path.join(libs_dir, fname)) and ".so" in fname and "-" in fname: base_name = fname.split("-")[0] name_map[base_name] = fname - logger.info(f"Found already bundled lib: {base_name} -> {fname}") + logger.info("Found already bundled lib: %s -> %s", base_name, fname) return name_map @@ -243,31 +242,25 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): rpath = res.stdout.strip() if res.returncode == 0 else "" if "$ORIGIN" in rpath.split(":"): continue - if not rpath: - rpath = "$ORIGIN" - else: - rpath = "$ORIGIN:" + rpath - logger.debug(f"Setting rpath for {fpath} to {rpath}") + rpath = "$ORIGIN" if not rpath else "$ORIGIN:" + rpath + logger.debug("Setting rpath for %s to %s", fpath, rpath) run_command(["patchelf", "--set-rpath", rpath, fpath]) pkg_plugins_dir = os.path.join(pkg_libs_dir, install_dirname) - logger.debug(f"Copying plugins from {sys_plugins_dir} to {pkg_plugins_dir}") + logger.debug("Copying plugins from %s to %s", sys_plugins_dir, pkg_plugins_dir) copied_files = copytree(sys_plugins_dir, pkg_plugins_dir) if not copied_files: raise RuntimeError(f"No plugins found in {sys_plugins_dir}") # Patch all libs to load plugin deps from the wheel for fname in copied_files: - logger.debug(f"Patching {fname}") + logger.debug("Patching %s", fname) fpath = os.path.join(pkg_plugins_dir, fname) if os.path.isfile(fpath) and ".so" in fname: res = run_command(["patchelf", "--print-rpath", fpath], capture_output=True, text=True, check=False) rpath = res.stdout.strip() if res.returncode == 0 else "" - if not rpath: - rpath = "$ORIGIN/..:$ORIGIN" - else: - rpath = "$ORIGIN/..:$ORIGIN:" + rpath - logger.debug(f"Setting rpath for {fpath} to {rpath}") + rpath = "$ORIGIN/..:$ORIGIN" if not rpath else "$ORIGIN/..:$ORIGIN:" + rpath + logger.debug("Setting rpath for %s to %s", fpath, rpath) run_command(["patchelf", "--set-rpath", rpath, fpath]) # Replace the original libs with the patched one @@ -276,20 +269,19 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname): base_name = libname.split(".")[0] if base_name in name_map: packaged_name = name_map[base_name] - logger.debug(f"Replacing {libname} with {packaged_name} in {fpath}") + logger.debug("Replacing %s with %s in %s", libname, packaged_name, fpath) run_command(["patchelf", "--replace-needed", libname, packaged_name, fpath]) # Check that there is no breakage introduced in the patched lib - logger.debug(f"Checking that {fpath} loads") + logger.debug("Checking that %s loads", fpath) original_deps = get_lib_deps(os.path.join(sys_plugins_dir, fname)) for libname, libpath in get_lib_deps(fpath).items(): - if libpath is None: - if libname not in original_deps or original_deps[libname] is not None: - raise RuntimeError(f"Library {libname} not loaded by {fpath}") + if libpath is None and (libname not in original_deps or original_deps[libname] is not None): + raise RuntimeError(f"Library {libname} not loaded by {fpath}") create_wheel(wheel_path, temp_dir) shutil.rmtree(temp_dir) - logger.info(f"Added plugins to wheel: {wheel_path}") + logger.info("Added plugins to wheel: %s", wheel_path) def build_and_install_prerequisites(args): @@ -302,7 +294,7 @@ def build_and_install_prerequisites(args): cached_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME) if not args.force_reinstall and cached_wheel: - logger.info(f"\n--> Found self-contained wheel: {os.path.basename(cached_wheel)}.") + logger.info("\n--> Found self-contained wheel: %s.", os.path.basename(cached_wheel)) logger.info("--> Installing from cache, skipping all source builds.") install_command = [sys.executable, "-m", "pip", "install", cached_wheel] run_command(install_command) @@ -314,7 +306,7 @@ def build_and_install_prerequisites(args): run_command([sys.executable, "-m", "pip", "install", "auditwheel"]) install_system_dependencies() ucx_install_path = os.path.abspath(UCX_INSTALL_DIR) - logger.info(f"--> Using wheel cache directory: {WHEELS_CACHE_HOME}") + logger.info("--> Using wheel cache directory: %s", WHEELS_CACHE_HOME) os.makedirs(WHEELS_CACHE_HOME, exist_ok=True) # -- Step 1: Build UCX from source -- @@ -351,7 +343,7 @@ def build_and_install_prerequisites(args): else: run_command(["git", "fetch", "--tags"], cwd=NIXL_DIR) run_command(["git", "checkout", NIXL_VERSION], cwd=NIXL_DIR) - logger.info(f"--> Checked out NIXL version: {NIXL_VERSION}") + logger.info("--> Checked out NIXL version: %s", NIXL_VERSION) build_env = os.environ.copy() build_env["PKG_CONFIG_PATH"] = os.path.join(ucx_install_path, "lib", "pkgconfig") @@ -359,7 +351,7 @@ def build_and_install_prerequisites(args): ucx_plugin_path = os.path.join(ucx_lib_path, "ucx") existing_ld_path = os.environ.get("LD_LIBRARY_PATH", "") build_env["LD_LIBRARY_PATH"] = f"{ucx_lib_path}:{ucx_plugin_path}:{existing_ld_path}".strip(":") - logger.info(f"--> Using LD_LIBRARY_PATH: {build_env['LD_LIBRARY_PATH']}") + logger.info("--> Using LD_LIBRARY_PATH: %s", build_env["LD_LIBRARY_PATH"]) temp_wheel_dir = os.path.join(ROOT_DIR, "temp_wheelhouse") run_command( @@ -406,11 +398,11 @@ def build_and_install_prerequisites(args): ucx_plugins_src = os.path.join(ucx_install_path, "lib", "ucx") if os.path.exists(ucx_plugins_src): - logger.info(f"--> Adding plugins from {ucx_plugins_src}") + logger.info("--> Adding plugins from %s", ucx_plugins_src) # Direct call to the ported function add_plugins(repaired_wheel, ucx_plugins_src, "ucx") else: - logger.info(f"--> Warning: UCX plugins not found. Skipping bundling.") + logger.info("--> Warning: UCX plugins not found. Skipping bundling.") # No more temporary files to remove, just the temp wheelhouse run_command(["rm", "-rf", temp_wheel_dir]) @@ -419,9 +411,8 @@ def build_and_install_prerequisites(args): if not newly_built_wheel: raise RuntimeError("Failed to find the repaired NIXL wheel.") - logger.info( - f"--> Successfully built self-contained wheel: {os.path.basename(newly_built_wheel)}. Now installing..." - ) + logger.info("--> Successfully built self-contained wheel: %s. Now installing...", + os.path.basename(newly_built_wheel)) install_command = [ sys.executable, "-m",