diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index f7bcfd4f..7a53af80 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -28,7 +28,7 @@ steps: agents: queue: "juliagpu" cuda: "*" - if: build.message !~ /\[skip tests\]/ + if: build.message !~ /\[skip tests\]/ && (build.branch == "main" || build.pull_request.base_branch == "main" || build.message !~ /\[skip jll\]/) timeout_in_minutes: 44 env: LD_LIBRARY_PATH: "" diff --git a/.githash b/.githash deleted file mode 100644 index 6cdfeea5..00000000 --- a/.githash +++ /dev/null @@ -1 +0,0 @@ -dec047f1bd1c8287513c6c437f946982e516ccd4 diff --git a/.github/scripts/check_versions.py b/.github/scripts/check_versions.py new file mode 100644 index 00000000..0d8926aa --- /dev/null +++ b/.github/scripts/check_versions.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python3 +"""Version consistency check for pull requests targeting main.""" + +import argparse +import re +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + capture_output=True, text=True, check=True, +).stdout.strip()) + +# To reuse this script for another package (e.g. Legate.jl), change only this block. + +PACKAGE_NAME = "cuNumeric" + +WRAPPER_VERSION_FILE = "lib/cunumeric_jl_wrapper/VERSION" +WRAPPER_JLL_COMPAT_KEY = "cunumeric_jl_wrapper_jll" +WRAPPER_SRC_PREFIXES = ( + "lib/cunumeric_jl_wrapper/src/", + "lib/cunumeric_jl_wrapper/include/", + "lib/cunumeric_jl_wrapper/CMakeLists.txt", +) + +SUBPKG_TOML_PATH = "lib/CNPreferences/Project.toml" +SUBPKG_COMPAT_KEY = "CNPreferences" +SUBPKG_SRC_PREFIXES = ( + "lib/CNPreferences/src/", +) + + +def parse_version(v: str) -> tuple: + return tuple(int(x) for x in v.strip().split(".")) + + +def version_gt(a: str, b: str) -> bool: + return parse_version(a) > parse_version(b) + + +def git_show(ref: str, path: str) -> str: + result = subprocess.run( + ["git", "show", f"{ref}:{path}"], + capture_output=True, text=True, check=True, + cwd=REPO_ROOT, + ) + return result.stdout + + +def changed_files(base_ref: str) -> list: + result = subprocess.run( + ["git", "diff", "--name-only", f"{base_ref}...HEAD"], + capture_output=True, text=True, check=True, + cwd=REPO_ROOT, + ) + return result.stdout.splitlines() + + +def parse_top_level_version(toml_text: str) -> str: + for line in toml_text.splitlines(): + stripped = line.strip() + if re.match(r"^\[", stripped): + break + m = re.match(r'^version\s*=\s*"([^"]+)"', stripped) + if m: + return m.group(1) + raise ValueError("Could not find top-level version in TOML") + + +def parse_compat_section(toml_text: str) -> dict: + in_compat = False + result = {} + for line in toml_text.splitlines(): + stripped = line.strip() + if re.match(r"^\[", stripped): + in_compat = stripped == "[compat]" + continue + if in_compat and stripped and not stripped.startswith("#"): + m = re.match(r'^([\w_-]+)\s*=\s*"([^"]+)"', stripped) + if m: + result[m.group(1)] = m.group(2) + return result + + +def check_package_version(base_ref: str, pr_toml: str, errors: list): + main_toml = git_show(base_ref, "Project.toml") + main_ver = parse_top_level_version(main_toml) + pr_ver = parse_top_level_version(pr_toml) + + print(f"\t[{PACKAGE_NAME}]\tmain={main_ver}\tpr={pr_ver}") + if not version_gt(pr_ver, main_ver): + errors.append( + f"{PACKAGE_NAME} version must be greater than main.\n" + f"\tmain: {main_ver} → pr: {pr_ver}\n" + f"\tBump the version in Project.toml (patch, minor, or major)." + ) + else: + print(f"\t\tOK ({main_ver} → {pr_ver})") + + +def check_wrapper_version(base_ref: str, changed: list, errors: list): + src_changed = [f for f in changed if any(f.startswith(p) for p in WRAPPER_SRC_PREFIXES)] + if not src_changed: + print("\t[wrapper]\tno source changes — skipping") + return + + print(f"\t[wrapper]\t{len(src_changed)} source file(s) changed:") + for f in src_changed: + print(f"\t\t{f}") + + pr_ver = (REPO_ROOT / WRAPPER_VERSION_FILE).read_text().strip() + try: + main_ver = git_show(base_ref, WRAPPER_VERSION_FILE).strip() + except subprocess.CalledProcessError: + main_ver = "0.0.0" + + print(f"\t\tVERSION\tmain={main_ver}\tpr={pr_ver}") + if not version_gt(pr_ver, main_ver): + errors.append( + f"{WRAPPER_VERSION_FILE} must be incremented when wrapper source changes.\n" + f"\tmain: {main_ver} → pr: {pr_ver}\n" + f"\tBump {WRAPPER_VERSION_FILE}." + ) + else: + print(f"\t\tOK VERSION ({main_ver} → {pr_ver})") + + +def check_wrapper_compat_sync(pr_toml: str, errors: list): + wrapper_ver = (REPO_ROOT / WRAPPER_VERSION_FILE).read_text().strip() + compat_ver = parse_compat_section(pr_toml).get(WRAPPER_JLL_COMPAT_KEY) + + lhs = WRAPPER_VERSION_FILE + rhs = f"Project.toml [compat] {WRAPPER_JLL_COMPAT_KEY}" + w = max(len(lhs), len(rhs)) + print(f"\t[wrapper compat sync]") + print(f"\t\t{lhs:<{w}} = {wrapper_ver}") + print(f"\t\t{rhs:<{w}} = {compat_ver}") + if compat_ver is None: + errors.append( + f"{WRAPPER_JLL_COMPAT_KEY} not found in Project.toml [compat]." + ) + elif compat_ver != wrapper_ver: + errors.append( + f"Project.toml [compat] {WRAPPER_JLL_COMPAT_KEY}={compat_ver} does not match {WRAPPER_VERSION_FILE}={wrapper_ver}.\n" + f"\tSet {WRAPPER_JLL_COMPAT_KEY} = \"{wrapper_ver}\" in Project.toml [compat]." + ) + else: + print(f"\t\tOK") + + +def check_subpkg_version(base_ref: str, pr_toml: str, changed: list, errors: list): + src_changed = [f for f in changed if any(f.startswith(p) for p in SUBPKG_SRC_PREFIXES)] + if not src_changed: + print(f"\t[{SUBPKG_COMPAT_KEY}]\tno source changes — skipping") + return + + print(f"\t[{SUBPKG_COMPAT_KEY}]\t{len(src_changed)} source file(s) changed:") + for f in src_changed: + print(f"\t\t{f}") + + pr_subpkg_toml = (REPO_ROOT / SUBPKG_TOML_PATH).read_text() + try: + main_subpkg_toml = git_show(base_ref, SUBPKG_TOML_PATH) + except subprocess.CalledProcessError: + main_subpkg_toml = 'version = "0.0.0"' + + pr_ver = parse_top_level_version(pr_subpkg_toml) + main_ver = parse_top_level_version(main_subpkg_toml) + + print(f"\t\tversion\tmain={main_ver}\tpr={pr_ver}") + if not version_gt(pr_ver, main_ver): + errors.append( + f"{SUBPKG_TOML_PATH} version must be incremented when {SUBPKG_COMPAT_KEY} source changes.\n" + f"\tmain: {main_ver} → pr: {pr_ver}\n" + f"\tBump the version in {SUBPKG_TOML_PATH}." + ) + else: + print(f"\t\tOK version ({main_ver} → {pr_ver})") + + compat_ver = parse_compat_section(pr_toml).get(SUBPKG_COMPAT_KEY) + if compat_ver is None: + errors.append( + f"{SUBPKG_COMPAT_KEY} not found in Project.toml [compat]." + ) + elif compat_ver != pr_ver: + errors.append( + f"Project.toml [compat] {SUBPKG_COMPAT_KEY}={compat_ver} does not match {SUBPKG_TOML_PATH} version={pr_ver}.\n" + f"\tSet {SUBPKG_COMPAT_KEY} = \"{pr_ver}\" in Project.toml [compat]." + ) + else: + print(f"\t\tOK compat {SUBPKG_COMPAT_KEY}={compat_ver}") + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("--base-ref", default="origin/main", + help="Git ref to compare against (default: origin/main)") + args = parser.parse_args() + + base_ref = args.base_ref + errors = [] + + pr_toml = (REPO_ROOT / "Project.toml").read_text() + changed = changed_files(base_ref) + + print("Version check") + print("─" * 60) + check_package_version(base_ref, pr_toml, errors) + check_wrapper_version(base_ref, changed, errors) + check_wrapper_compat_sync(pr_toml, errors) + check_subpkg_version(base_ref, pr_toml, changed, errors) + print("─" * 60) + + if errors: + print(f"\nFAILED — {len(errors)} error(s):\n") + for i, e in enumerate(errors, 1): + print(f"\t{i}. {e}\n") + sys.exit(1) + else: + print("\nPASSED") + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0daab8c4..f7383316 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,8 +19,6 @@ on: - 'deps/build.jl' - 'Project.toml' - 'lib/CNPreferences/src/**' - - '.github/workflows/ci.yml' - - 'Dockerfile' # container.yml depends on this tags: - 'v*' branches: @@ -32,11 +30,28 @@ on: - 'deps/build.jl' - 'Project.toml' - 'lib/CNPreferences/src/**' - - '.github/workflows/ci.yml' - - 'Dockerfile' # container.yml depends on this jobs: + pkg_resolve: + uses: ./.github/workflows/pkg_resolve.yml + + check_changes: + name: Check for wrapper changes + runs-on: ubuntu-latest + outputs: + wrapper_changed: ${{ steps.filter.outputs.wrapper }} + steps: + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + wrapper: + - 'lib/cunumeric_jl_wrapper/**' + test: name: Julia ${{ matrix.julia }} - ${{ matrix.os }} + needs: [pkg_resolve, check_changes] + if: ${{ github.base_ref == 'main' || needs.check_changes.outputs.wrapper_changed != 'true' }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -61,6 +76,37 @@ jobs: - uses: julia-actions/cache@v2 + - name: Detect Legate branch override + id: legate-branch + run: | + MSG=$(printf '%s\n%s' \ + "${{ github.event.head_commit.message }}" \ + "${{ github.event.pull_request.body }}") + BRANCH=$(echo "$MSG" | grep -oiP 'legate[-_]branch:\s*\K\S+' | head -1 | tr -d '[]' || true) + echo "branch=${BRANCH}" >> $GITHUB_OUTPUT + if [[ -n "$BRANCH" ]]; then + echo "Using Legate.jl branch override: $BRANCH" + else + echo "No Legate.jl branch override — using JLL" + fi + + - name: Checkout Legate.jl branch override + if: steps.legate-branch.outputs.branch != '' + uses: actions/checkout@v4 + with: + repository: ${{ github.repository_owner }}/Legate.jl + ref: ${{ steps.legate-branch.outputs.branch }} + path: Legate.jl + + - name: Develop Legate.jl branch override + if: steps.legate-branch.outputs.branch != '' + run: | + julia --color=yes -e ' + using Pkg; + Pkg.develop(PackageSpec(path = "Legate.jl/lib/LegatePreferences")) + Pkg.develop(PackageSpec(path = "Legate.jl")) + ' + - name: Run tests env: GPUTESTS: "0" # parsed by runtests.jl diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index 00d5d995..07a68c92 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -18,6 +18,7 @@ on: - main jobs: push_to_registry: + if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} name: Container for ${{ matrix.platform }} - Julia ${{ matrix.julia }} - CUDA ${{ matrix.cuda }} permissions: contents: read diff --git a/.github/workflows/developer.yml b/.github/workflows/developer.yml index 10c5094b..ebeafdd1 100644 --- a/.github/workflows/developer.yml +++ b/.github/workflows/developer.yml @@ -38,8 +38,12 @@ on: - 'lib/CNPreferences/src/**' - '.github/workflows/developer.yml' jobs: + pkg_resolve: + uses: ./.github/workflows/pkg_resolve.yml + docs: - name : Developer CI test + name: Developer CI test + needs: pkg_resolve permissions: contents: read packages: write @@ -55,7 +59,7 @@ jobs: shell: bash env: LEGATE_AUTO_CONFIG: 0 - NO_CUDA: ON + LEGATE_WRAPPER_ENABLE_CUDA: OFF steps: - name: Check out the repo uses: actions/checkout@v4 @@ -87,6 +91,36 @@ jobs: with: cache-name: julia-developer-ci + # Parse commit message or PR body for [legate-branch: ]. + # Example: include "[legate-branch: my-feature]" anywhere in the message. + - name: Detect Legate branch override + id: legate-branch + run: | + MSG=$(printf '%s\n%s' \ + "${{ github.event.head_commit.message }}" \ + "${{ github.event.pull_request.body }}") + BRANCH=$(echo "$MSG" | grep -oiP 'legate[-_]branch:\s*\K\S+' | head -1 | tr -d '[]' || true) + echo "branch=${BRANCH}" >> $GITHUB_OUTPUT + if [[ -n "$BRANCH" ]]; then + echo "Using Legate.jl branch override: $BRANCH" + else + echo "No Legate.jl branch override — using JLL" + fi + + - name: Clone and develop Legate.jl branch override + if: steps.legate-branch.outputs.branch != '' + run: | + git clone --depth 1 --branch ${{ steps.legate-branch.outputs.branch }} \ + ${{ github.server_url }}/${{ github.repository_owner }}/Legate.jl.git \ + /tmp/Legate.jl + julia --color=yes -e ' + using Pkg; + Pkg.develop(PackageSpec(path = "/tmp/Legate.jl/lib/LegatePreferences")) + Pkg.develop(PackageSpec(path = "/tmp/Legate.jl")) + using LegatePreferences; LegatePreferences.use_developer_mode(); + Pkg.build("Legate") + ' + - name: Setup cuNumeric.jl with build from src wrappers run: | julia --color=yes -e ' @@ -95,6 +129,11 @@ jobs: using CNPreferences; CNPreferences.use_developer_mode(); Pkg.develop(PackageSpec(path = ".")) ' + julia --color=yes --project=test -e ' + using Pkg; + Pkg.develop(PackageSpec(path = "lib/CNPreferences")) + using CNPreferences; CNPreferences.use_developer_mode(); + ' julia --color=yes -e 'using Pkg; Pkg.build("cuNumeric")' - name: Perform Test diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e3322451..a688ae08 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -21,8 +21,24 @@ on: - 'docs/Project.toml' - 'README.md' jobs: + check_changes: + name: Check for wrapper changes + runs-on: ubuntu-latest + outputs: + wrapper_changed: ${{ steps.filter.outputs.wrapper }} + steps: + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + wrapper: + - 'lib/cunumeric_jl_wrapper/**' + docs: name : Documentation + needs: check_changes + if: ${{ github.base_ref == 'main' || needs.check_changes.outputs.wrapper_changed != 'true' }} permissions: actions: write contents: write diff --git a/.github/workflows/pkg_resolve.yml b/.github/workflows/pkg_resolve.yml new file mode 100644 index 00000000..47c5302d --- /dev/null +++ b/.github/workflows/pkg_resolve.yml @@ -0,0 +1,22 @@ +name: Pkg Resolve + +on: + workflow_call: + +jobs: + resolve: + name: Pkg.resolve + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: julia-actions/setup-julia@v2 + with: + version: '1.10' + + - uses: julia-actions/cache@v2 + + - name: Pkg.resolve + run: | + rm -f Manifest.toml + julia --project -e 'using Pkg; Pkg.Registry.add("General"); Pkg.resolve()' diff --git a/.github/workflows/version_check.yml b/.github/workflows/version_check.yml new file mode 100644 index 00000000..346bc7d6 --- /dev/null +++ b/.github/workflows/version_check.yml @@ -0,0 +1,25 @@ +name: Version Check + +on: + pull_request: + branches: + - main + +jobs: + version-check: + name: Version Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Fetch main branch + run: git fetch origin main + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Run version checks + run: python .github/scripts/check_versions.py diff --git a/.gitignore b/.gitignore index 0046a57e..3b09cfb4 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,10 @@ kernel.ptx *.data* .envrc +# Generated by build.jl — do not commit +dev/Project.toml +dev/Manifest.toml + # Generated by build process deps/cupynumeric-* libcupynumeric @@ -82,3 +86,4 @@ node_modules *.exe *.out *.app +.githash diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 37b95947..03d02289 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,12 +22,3 @@ repos: entry: julia --project=. -e 'using JuliaFormatter; format(ARGS, verbose=true)' language: system types: [julia] - - - id: update-githash - name: Update .githash - description: "Write the current git commit hash to .githash" - entry: bash -c 'git rev-parse HEAD > .githash' - language: system - always_run: true - pass_filenames: false - stages: [post-commit] diff --git a/Project.toml b/Project.toml index 535b8060..c898d6b4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,13 @@ name = "cuNumeric" uuid = "0fd9ffd4-7e84-4cd0-b8f8-645bd8c73620" -version = "0.1.0" +version = "0.1.1" + +[workspace] +projects = ["test", "dev"] [deps] CNPreferences = "3e078157-ea10-49d5-bf32-908f777cd46f" +CUDA_SDK_jll = "6cbf2f2e-7e60-5632-ac76-dca2274e0be0" CxxWrap = "1f15a43c-97ca-5a2a-ae31-89f07a497df4" JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" Legate = "1238f2cf-6593-4d60-9aca-2f5364e49909" @@ -29,6 +33,7 @@ CUDAExt = "CUDA" [compat] CNPreferences = "0.1.2" CUDA = "5.9" +CUDA_SDK_jll = "13" CxxWrap = "0.17" JuliaFormatter = "2.3.0" Legate = "0.1.2" diff --git a/deps/build.jl b/deps/build.jl index d53e576f..bacfa699 100644 --- a/deps/build.jl +++ b/deps/build.jl @@ -17,124 +17,29 @@ * Ethan Meitz =# +using Pkg using Preferences using Legate using CNPreferences -include("version.jl") - -up_dir(dir::String) = abspath(joinpath(dir, "..")) - -# Automatically pipes errors to new file -# and appends stdout to build.log -function run_sh(cmd::Cmd, filename::String) - println(cmd) - - build_log = joinpath(@__DIR__, "build.log") - tmp_build_log = joinpath(@__DIR__, "$(filename).log") - err_log = joinpath(@__DIR__, "$(filename).err") - - if isfile(err_log) - rm(err_log) - end - - if isfile(tmp_build_log) - rm(tmp_build_log) - end - - try - run(pipeline(cmd; stdout=tmp_build_log, stderr=err_log, append=false)) - contents = read(tmp_build_log, String) - open(build_log, "a") do io - println(contents) - end - catch e - println("stderr log generated: ", err_log, '\n') - contents = read(err_log, String) - if !isempty(strip(contents)) - println("---- Begin stderr log ----") - println(contents) - println("---- End stderr log ----") - end - end -end - -function build_jlcxxwrap(repo_root, cupynumeric_root) - build_libcxxwrap = joinpath(repo_root, "scripts/install_cxxwrap.sh") - version_path = joinpath(DEPOT_PATH[1], "dev/libcxxwrap_julia_jll/override/LEGATE_INSTALL.txt") - if isfile(version_path) - version = VersionNumber(strip(read(version_path, String))) - @info "libcxxwrap: Found cuNumeric $version" - if is_supported_version(version) - @info "libcxxwrap: Found supported version built with cuNumeric.jl: $version" - return nothing - else - @info "libcxxwrap: Unsupported version found: $version. Rebuilding..." - end - else - @info "libcxxwrap: No version file found. Starting build..." - end +const BuildTools = Legate.BuildTools - @info "libcxxwrap: Running build script: $build_libcxxwrap" - run_sh(`bash $build_libcxxwrap $repo_root`, "libcxxwrap") - mkpath(dirname(version_path)) - open(version_path, "w") do io - write(io, string(get_cupynumeric_version(cupynumeric_root))) - end -end +include("version.jl") function build_cpp_wrapper( - repo_root, cupynumeric_loc, legate_loc, blas_loc, install_root + repo_root, cupynumeric_loc, legate_loc, blas_loc, install_root; + cuda_root=nothing, cuda_enabled=true, ) @info "libcunumeric_jl_wrapper: Building C++ Wrapper Library" - if isdir(install_root) - rm(install_root; recursive=true) - mkdir(install_root) - end - - build_cpp_wrapper = joinpath(repo_root, "scripts/build_cpp_wrapper.sh") - nthreads = Threads.nthreads() - - bld_command = `$build_cpp_wrapper $repo_root $cupynumeric_loc $legate_loc $blas_loc $install_root $nthreads` - - # write out a bash script for debugging - cmd_str = join(bld_command.exec, " ") - wrapper_path = joinpath(repo_root, "build_wrapper.sh") - open(wrapper_path, "w") do io - println(io, "#!/bin/bash") - println(io, "set -xe") - println(io, cmd_str) - end - chmod(wrapper_path, 0o755) - - @info "Running build command: $bld_command" - run_sh(`bash $bld_command`, "cpp_wrapper") + isdir(install_root) && (rm(install_root; recursive=true); mkdir(install_root)) + bld_command = `$(joinpath(repo_root, "scripts/build_cpp_wrapper.sh")) $repo_root $cupynumeric_loc $legate_loc $blas_loc $install_root $(Threads.nthreads())` + BuildTools.run_build_wrapper_script( + repo_root, bld_command; cuda_root, cuda_enabled, log_dir=@__DIR__ + ) end -function _find_jll_artifact_dir(jll) - eval(:(using $(jll))) - jll_mod = getfield(Main, jll) - root = jll_mod.artifact_dir - return root -end - -function _start_build() - pkg_root = up_dir(@__DIR__) - deps_dir = joinpath(@__DIR__) - - build_log = joinpath(deps_dir, "build.log") - open(build_log, "w") do io - println(io, "=== Build started ===") - end - - @info "cuNumeric.jl: Parsed Package Dir as: $(pkg_root)" - return pkg_root -end - -""" - build CxxWrap and cunumeric_jl_wrapper -""" -function build_deps(pkg_root, cupynumeric_root, blas_root) +function build_deps(pkg_root, cupynumeric_root, blas_root; cuda_root=nothing, cuda_enabled=true) + BuildTools.check_cmake_version(Legate.MIN_CMAKE_VERSION) legate_lib = Legate.get_install_liblegate() install_lib = joinpath(pkg_root, "lib", "cunumeric_jl_wrapper", "build") if !cupynumeric_valid(cupynumeric_root) @@ -144,11 +49,17 @@ function build_deps(pkg_root, cupynumeric_root, blas_root) "$(MIN_CUNUMERIC_VERSION)-$(MAX_CUNUMERIC_VERSION).", ) end - build_jlcxxwrap(pkg_root, cupynumeric_root) + + BuildTools.build_jlcxxwrap( + pkg_root, get_cupynumeric_version(cupynumeric_root); + log_dir=@__DIR__, is_compatible=is_supported_version, + ) build_cpp_wrapper( pkg_root, cupynumeric_root, up_dir(legate_lib), blas_root, - install_lib, - ) # $pkg_root/lib/cunumeric_jl_wrapper + install_lib; + cuda_root, cuda_enabled, + ) + BuildTools.set_jll_artifact_override(:cunumeric_jl_wrapper_jll, install_lib) end function build(::CNPreferences.JLL) @@ -158,7 +69,7 @@ end function build(::CNPreferences.Conda) @warn "Conda Build does not currently pass our CI. Proceed with caution." - pkg_root = _start_build() + pkg_root = BuildTools.start_build("cuNumeric.jl", @__DIR__) cupynumeric_root = load_preference(CNPreferences, "cunumeric_conda_env", nothing) if isnothing(cupynumeric_root) @@ -166,28 +77,27 @@ function build(::CNPreferences.Conda) end is_cupynumeric_installed(cupynumeric_root; throw_errors=true) - build_deps(pkg_root, cupynumeric_root, cupynumeric_root) # blas is same root as cupynumeric + build_deps(pkg_root, cupynumeric_root, cupynumeric_root) end function build(::CNPreferences.Developer) - pkg_root = _start_build() + pkg_root = BuildTools.start_build("cuNumeric.jl", @__DIR__) - # can be nothing so this errors if not set cupynumeric_root = load_preference(CNPreferences, "cunumeric_path", nothing) blas_lib = load_preference(CNPreferences, "BLAS_LIB", nothing) + if isnothing(cupynumeric_root) - # we are using cupynumeric_jll - cupynumeric_root = _find_jll_artifact_dir(:cupynumeric_jll) + cupynumeric_root, cuda_root = BuildTools.setup_jll_build_env( + pkg_root, BuildTools.CUNUMERIC_JLL_DEP + ) + cuda_enabled = !isnothing(cuda_root) # cuda_root resolving to nothing means there is no cuda else - # this means we have a custom path set is_cupynumeric_installed(cupynumeric_root; throw_errors=true) + cuda_enabled, cuda_root = BuildTools.resolve_custom_cuda("cupynumeric") # cuda_root is nothing. end - if isnothing(blas_lib) - blas_lib = _find_jll_artifact_dir(:OpenBLAS32_jll) - end - - build_deps(pkg_root, cupynumeric_root, up_dir(blas_lib)) + blas_lib = something(blas_lib, BuildTools.find_jll_artifact_dir(:OpenBLAS32_jll)) + build_deps(pkg_root, cupynumeric_root, up_dir(blas_lib); cuda_root, cuda_enabled) end const mode_str = load_preference(CNPreferences, "cunumeric_mode", CNPreferences.MODE_JLL) diff --git a/deps/version.jl b/deps/version.jl index 54e2e09b..68a8199b 100644 --- a/deps/version.jl +++ b/deps/version.jl @@ -17,31 +17,14 @@ * Ethan Meitz =# -const MIN_CUDA_VERSION = v"13.0" -const MAX_CUDA_VERSION = v"13.9.999" const MIN_CUNUMERIC_VERSION = v"25.10.00" const MAX_CUNUMERIC_VERSION = v"25.12.00" up_dir(dir::String) = abspath(joinpath(dir, "..")) -function get_version(version_file::String) - version = nothing - open(version_file, "r") do f - data = readlines(f) - major = parse(Int, split(data[end - 2])[end]) - minor = parse(Int, lpad(split(data[end - 1])[end], 2, '0')) - patch = parse(Int, lpad(split(data[end])[end], 2, '0')) - version = VersionNumber(major, minor, patch) - end - if isnothing(version) - error("cuNumeric.jl: Failed to parse version for $(version_file)") - end - return version -end - function get_cupynumeric_version(cupynumeric_root::String) version_file = joinpath(cupynumeric_root, "include", "cupynumeric", "version_config.hpp") - return get_version(version_file) + return Legate.BuildTools.get_version(version_file) end function is_supported_version(version::VersionNumber) diff --git a/ext/CUDAExt/cuda.jl b/ext/CUDAExt/cuda.jl index 05d7e9f1..33421640 100644 --- a/ext/CUDAExt/cuda.jl +++ b/ext/CUDAExt/cuda.jl @@ -76,7 +76,7 @@ end function nda_to_logical_array(arr::NDArray{T,N}) where {T,N} st_handle = cuNumeric.get_store(arr) - return Legate.LogicalArray{T,N}(st_handle[], size(arr)) + return Legate.LogicalArray{T,N}(st_handle, size(arr)) end function Launch(kernel::cuNumeric.CUDATask, inputs::Tuple{Vararg{NDArray}}, diff --git a/lib/cunumeric_jl_wrapper/CMakeLists.txt b/lib/cunumeric_jl_wrapper/CMakeLists.txt index 2da543ab..dd0841a1 100644 --- a/lib/cunumeric_jl_wrapper/CMakeLists.txt +++ b/lib/cunumeric_jl_wrapper/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required(VERSION 3.22.1 FATAL_ERROR) +cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) + project(cuNumericWrapper) set(cuNumericWrapperVersion 0.0.1) @@ -11,12 +12,9 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") -# ---- New: NOCUDA option ---- -option(NOCUDA "Build without CUDA support (skip CUDAToolkit and src/cuda.cpp)" OFF) +option(LEGATE_WRAPPER_ENABLE_CUDA "Build cunumeric_jl_wrapper with CUDA support" ON) option(BINARYBUILDER "Building with binary builder" ON) - -# Always needed (unless your packages themselves require CUDA at configure time) find_package(legate REQUIRED) find_package(cupynumeric REQUIRED) @@ -44,16 +42,14 @@ set(SOURCES src/types.cpp ) -# Conditionally add CUDA bits -if(NOT NOCUDA) +if(LEGATE_WRAPPER_ENABLE_CUDA) find_package(CUDAToolkit 13.0 REQUIRED) list(APPEND SOURCES src/cuda.cpp) - add_compile_definitions(HAVE_CUDA) - set(HAVE_CUDA TRUE) - message(STATUS "CUDA enabled: adding src/cuda.cpp") + message(STATUS "LEGATE_WRAPPER_ENABLE_CUDA=ON: adding src/cuda.cpp") else() - set(HAVE_CUDA FALSE) - message(STATUS "NOCUDA=ON: skipping CUDAToolkit and src/cuda.cpp") + # only disables find_package requirement for CUDAToolkit. + # if you have a CUDA enabled cuNumeric install, this really won't do anything. + message(STATUS "LEGATE_WRAPPER_ENABLE_CUDA=OFF: skipping CUDAToolkit and src/cuda.cpp.") endif() # Library: C++ wrapper @@ -67,13 +63,11 @@ target_link_libraries(${CXX_CUNUMERICJL_WRAPPER} PRIVATE JlCxx::cxxwrap_julia_stl ) -# Include dirs (conditionally add CUDA include path) target_include_directories(${CXX_CUNUMERICJL_WRAPPER} PRIVATE include) -if(HAVE_CUDA) +if(LEGATE_WRAPPER_ENABLE_CUDA) target_include_directories(${CXX_CUNUMERICJL_WRAPPER} PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) endif() - install(TARGETS ${CXX_CUNUMERICJL_WRAPPER} DESTINATION lib) # ---- C API ---- @@ -91,7 +85,7 @@ target_link_libraries(${C_INTERFACE_LIB} PRIVATE ) target_include_directories(${C_INTERFACE_LIB} PRIVATE include) -if(HAVE_CUDA) +if(LEGATE_WRAPPER_ENABLE_CUDA) target_include_directories(${C_INTERFACE_LIB} PRIVATE ${CUDAToolkit_INCLUDE_DIRS}) endif() diff --git a/lib/cunumeric_jl_wrapper/VERSION b/lib/cunumeric_jl_wrapper/VERSION index f07e60ca..7cbc7d43 100644 --- a/lib/cunumeric_jl_wrapper/VERSION +++ b/lib/cunumeric_jl_wrapper/VERSION @@ -1 +1 @@ -25.10.3 +25.10.4 diff --git a/lib/cunumeric_jl_wrapper/include/types.h b/lib/cunumeric_jl_wrapper/include/types.h index c75754db..88a18dc9 100644 --- a/lib/cunumeric_jl_wrapper/include/types.h +++ b/lib/cunumeric_jl_wrapper/include/types.h @@ -65,3 +65,6 @@ void wrap_unary_reds(jlcxx::Module&); // Binary op codes void wrap_binary_ops(jlcxx::Module&); + +// Linear algebra op codes +void wrap_linalg_ops(jlcxx::Module& mod); diff --git a/lib/cunumeric_jl_wrapper/include/ufi.h b/lib/cunumeric_jl_wrapper/include/ufi.h index f5558c16..b132dd66 100644 --- a/lib/cunumeric_jl_wrapper/include/ufi.h +++ b/lib/cunumeric_jl_wrapper/include/ufi.h @@ -1,6 +1,6 @@ /* Copyright 2026 Northwestern University, * Carnegie Mellon University University - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -23,7 +23,7 @@ #include "jlcxx/jlcxx.hpp" #include "legate.h" -#ifdef HAVE_CUDA +#if LEGATE_DEFINED(LEGATE_USE_CUDA) namespace ufi { enum TaskIDs { LOAD_PTX_TASK = 143432, diff --git a/lib/cunumeric_jl_wrapper/src/ndarray.cpp b/lib/cunumeric_jl_wrapper/src/ndarray.cpp index 47cdf3b0..ed157b07 100644 --- a/lib/cunumeric_jl_wrapper/src/ndarray.cpp +++ b/lib/cunumeric_jl_wrapper/src/ndarray.cpp @@ -237,6 +237,23 @@ void nda_unary_reduction(CN_NDArray* out, CuPyNumericUnaryRedCode op_code, out->obj.unary_reduction(op_code, input->obj); } +CN_NDArray* nda_unary_reduction_axes(CuPyNumericUnaryRedCode op_code, CN_NDArray* input, const int32_t* axes, int32_t num_axes, bool keepdims) { + std::vector axis_vec(axes, axes + num_axes); + NDArray result = input->obj._perform_unary_reduction( + static_cast(op_code), + input->obj, + axis_vec, + std::nullopt, // dtype + std::nullopt, // res_dtype + std::nullopt, // out + keepdims, + {}, // args + std::nullopt, // initial + std::nullopt // where + ); + return new CN_NDArray{NDArray(std::move(result))}; +} + NDArray get_slice(NDArray arr, std::vector slices) { switch (slices.size()) { case 1: { diff --git a/lib/cunumeric_jl_wrapper/src/types.cpp b/lib/cunumeric_jl_wrapper/src/types.cpp index 2e73ebd5..f181dc2e 100644 --- a/lib/cunumeric_jl_wrapper/src/types.cpp +++ b/lib/cunumeric_jl_wrapper/src/types.cpp @@ -162,3 +162,8 @@ void wrap_binary_ops(jlcxx::Module& mod) { mod.set_const("SUBTRACT", CuPyNumericBinaryOpCode::CUPYNUMERIC_BINOP_SUBTRACT); } + +void wrap_linalg_ops(jlcxx::Module& mod) { + mod.set_const("SOLVE", legate::LocalTaskID{CuPyNumericOpCode::CUPYNUMERIC_SOLVE}); + mod.set_const("MP_SOLVE", legate::LocalTaskID{CuPyNumericOpCode::CUPYNUMERIC_MP_SOLVE}); +} \ No newline at end of file diff --git a/lib/cunumeric_jl_wrapper/src/wrapper.cpp b/lib/cunumeric_jl_wrapper/src/wrapper.cpp index 29334333..c5c792fa 100644 --- a/lib/cunumeric_jl_wrapper/src/wrapper.cpp +++ b/lib/cunumeric_jl_wrapper/src/wrapper.cpp @@ -42,10 +42,7 @@ struct WrapCppOptional { } }; -legate::LogicalArray* get_store(CN_NDArray* arr) { - auto res = arr->obj.get_store(); - return new legate::LogicalArray(std::move(res)); -} +legate::LogicalArray get_store(CN_NDArray* arr) { return arr->obj.get_store(); } legate::Library get_lib() { auto runtime = cupynumeric::CuPyNumericRuntime::get_runtime(); @@ -68,6 +65,7 @@ JLCXX_MODULE define_julia_module(jlcxx::Module& mod) { wrap_unary_ops(mod); wrap_binary_ops(mod); wrap_unary_reds(mod); + wrap_linalg_ops(mod); using jlcxx::ParameterList; using jlcxx::Parametric; diff --git a/scripts/build_cpp_wrapper.sh b/scripts/build_cpp_wrapper.sh index aae6f432..3a1cbe2c 100755 --- a/scripts/build_cpp_wrapper.sh +++ b/scripts/build_cpp_wrapper.sh @@ -42,20 +42,26 @@ fi echo $LEGATE_ROOT_DIR -# Default to OFF (CUDA support enabled), but allow override via environment variable -NO_CUDA=${NO_CUDA:-OFF} +LEGATE_WRAPPER_ENABLE_CUDA=${LEGATE_WRAPPER_ENABLE_CUDA:-ON} +CUDA_TOOLKIT_ROOT=${CUDA_TOOLKIT_ROOT:-} + +CUDA_ARGS=("-DLEGATE_WRAPPER_ENABLE_CUDA=${LEGATE_WRAPPER_ENABLE_CUDA}") +if [[ -n "$CUDA_TOOLKIT_ROOT" ]]; then + CUDA_ARGS+=("-DCUDAToolkit_ROOT=${CUDA_TOOLKIT_ROOT}") + CUDA_ARGS+=("-DCMAKE_LIBRARY_PATH=${CUDA_TOOLKIT_ROOT}/lib/stubs") +fi if [[ ! -f "$BUILD_DIR/CMakeCache.txt" ]]; then echo "Configuring project..." cmake -S "$CUNUMERIC_WRAPPER_SOURCE" -B "$BUILD_DIR" \ -D BINARYBUILDER=OFF \ - -D NOCUDA=$NO_CUDA \ -D CMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ -D CMAKE_PREFIX_PATH="$CUPYNUMERIC_ROOT_DIR;$LEGATE_ROOT_DIR;" \ -D CUPYNUMERIC_PATH="$CUPYNUMERIC_ROOT_DIR" \ -D BLAS_LIBRARIES="$BLAS_LIB_DIR/libopenblas.so" \ -D PROJECT_INSTALL_PATH="$INSTALL_DIR" \ - -D CMAKE_BUILD_TYPE=Releases + -D CMAKE_BUILD_TYPE=Release \ + "${CUDA_ARGS[@]}" else echo "Skipping configure (already done in $BUILD_DIR)" fi diff --git a/src/cuNumeric.jl b/src/cuNumeric.jl index 3a9280e6..0da59605 100644 --- a/src/cuNumeric.jl +++ b/src/cuNumeric.jl @@ -55,12 +55,16 @@ const DEFAULT_FLOAT = Float32 const DEFAULT_INT = Int32 const SUPPORTED_INT_TYPES = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64} -const SUPPORTED_FLOAT_TYPES = Union{Float32,Float64} # Float16 not supported yet +const SUPPORTED_FLOAT_TYPES = Union{Float32,Float64} # Float16 disabled for now. Issues need to be resolved. const SUPPORTED_COMPLEX_TYPES = Union{ComplexF32,ComplexF64} const SUPPORTED_NUMERIC_TYPES = Union{ SUPPORTED_INT_TYPES,SUPPORTED_FLOAT_TYPES,SUPPORTED_COMPLEX_TYPES } + +# solve has no integer backend kernel +const SUPPORTED_SOLVE_TYPES = Union{SUPPORTED_FLOAT_TYPES,SUPPORTED_COMPLEX_TYPES} + const SUPPORTED_ARRAY_TYPES = Union{Bool,SUPPORTED_NUMERIC_TYPES} const SUPPORTED_TYPES = Union{SUPPORTED_ARRAY_TYPES,String} @@ -144,6 +148,7 @@ include("ndarray/broadcast.jl") include("ndarray/ndarray.jl") include("ndarray/unary.jl") include("ndarray/binary.jl") +include("ndarray/linalg.jl") # scoping macro include("scoping.jl") @@ -230,7 +235,7 @@ function __init__() _is_precompiling() && return nothing # Cannot set LEGATE_CONFIG on CI machines used - # to register packages. So we will just skip starting + # to register packages. So we will just skip starting # legate/cunumeric when using registry CI machines. get(ENV, "JULIA_REGISTRYCI_AUTOMERGE", false) == "true" && return nothing diff --git a/src/memory.jl b/src/memory.jl index ca28d07f..2bf447dc 100644 --- a/src/memory.jl +++ b/src/memory.jl @@ -17,6 +17,12 @@ const soft_frac = Ref{Float64}(0.80) const hard_frac = Ref{Float64}(0.90) const AUTO_GC_ENABLE = Ref{Bool}(false) +# memory measured right after the last GC +const post_gc_device_bytes = Atomic{Int64}(0) +const post_gc_host_bytes = Atomic{Int64}(0) +# how much new memory must accumulate before GC fires again +const gc_hysteresis_frac = Ref{Float64}(0.05) + @doc""" init_gc!() @@ -90,15 +96,31 @@ end function maybe_collect() host_bytes = current_host_bytes[] device_bytes = current_device_bytes[] - if host_bytes > hard_limit() || device_bytes > hard_limit(; host=false) - # Aggressive - GC.gc(true) - recalibrate_allocator!() - elseif host_bytes > soft_limit() || device_bytes > soft_limit(; host=false) - # Gentle - GC.gc(false) - recalibrate_allocator!() + + # minimum growth above the post-GC floor needed to re-collect + dev_floor = post_gc_device_bytes[] + host_floor = post_gc_host_bytes[] + dev_delta = Int(round(gc_hysteresis_frac[] * total_device_bytes[])) + host_delta = Int(round(gc_hysteresis_frac[] * total_host_bytes[])) + grew = device_bytes > dev_floor + dev_delta || host_bytes > host_floor + host_delta + + if device_bytes > hard_limit(; host=false) || host_bytes > hard_limit() + grew && _collect!(true) + elseif device_bytes > soft_limit(; host=false) || host_bytes > soft_limit() + grew && _collect!(false) + else + # reset floors so the next spike is caught immediately + atomic_xchg!(post_gc_device_bytes, 0) + atomic_xchg!(post_gc_host_bytes, 0) end return nothing end + +function _collect!(full::Bool) + GC.gc(full) + recalibrate_allocator!() + atomic_xchg!(post_gc_device_bytes, current_device_bytes[]) + atomic_xchg!(post_gc_host_bytes, current_host_bytes[]) + return nothing +end diff --git a/src/ndarray/detail/ndarray.jl b/src/ndarray/detail/ndarray.jl index b11c4101..9d630fcc 100644 --- a/src/ndarray/detail/ndarray.jl +++ b/src/ndarray/detail/ndarray.jl @@ -62,7 +62,7 @@ mutable struct NDArray{T,N,PADDED,P} <: AbstractNDArray{T,N} return handle end end -# this here is to avoid if else patterns +# this here is to avoid if else patterns @inline _NDArray(ptr, T, v, ::Nothing) = NDArray(ptr, T, v) @inline _NDArray(ptr, T, v, parent) = NDArray(ptr, T, v, parent) @@ -219,6 +219,16 @@ function nda_unary_reduction(out::NDArray, op_code::UnaryRedCode, input::NDArray return out end +function nda_unary_reduction_axes( + op_code::UnaryRedCode, input::NDArray{T,N}, axes::Vector{Int32}, keepdims::Bool +) where {T,N} + axes_c = collect(Int32, axes) + ptr = ccall((:nda_unary_reduction_axes, libnda), + NDArray_t, (UnaryRedCode, NDArray_t, Ptr{Int32}, Int32, Cint), + op_code, input.ptr, axes_c, Int32(length(axes_c)), keepdims) + return NDArray(ptr) +end + function nda_array_equal(rhs1::NDArray{T,N}, rhs2::NDArray{T,N}) where {T,N} ptr = ccall((:nda_array_equal, libnda), NDArray_t, (NDArray_t, NDArray_t), @@ -226,18 +236,21 @@ function nda_array_equal(rhs1::NDArray{T,N}, rhs2::NDArray{T,N}) where {T,N} return NDArray(ptr, Bool, Val(1)) end -function nda_diag(arr::NDArray, k::Int32) +# 2D -> 1D: extract the k-th diagonal. Backend only supports the 2D case +# (1D-construct and >2D both abort), so non-2D input is a MethodError. +function nda_diag(arr::NDArray{T,2}, k::Int32) where {T} ptr = ccall((:nda_diag, libnda), NDArray_t, (NDArray_t, Int32), arr.ptr, k) - return NDArray(ptr) + return NDArray(ptr, T, Val(1)) end -function nda_unique(arr::NDArray) +# unique always returns a flat 1D array of the input's element type +function nda_unique(arr::NDArray{T}) where {T} ptr = ccall((:nda_unique, libnda), NDArray_t, (NDArray_t,), arr.ptr) - return NDArray(ptr) + return NDArray(ptr, T, Val(1)) end function nda_ravel(arr::NDArray) @@ -305,11 +318,12 @@ function nda_trace( return NDArray(ptr, T, Val(1)) end -function nda_transpose(arr::NDArray) +# transpose reverses the axes: element type and rank are preserved +function nda_transpose(arr::NDArray{T,N}) where {T,N} ptr = ccall((:nda_transpose, libnda), NDArray_t, (NDArray_t,), arr.ptr) - return NDArray(ptr) + return NDArray(ptr, T, Val(N)) end function nda_attach_external(arr::AbstractArray{T,N}) where {T,N} @@ -328,11 +342,8 @@ function get_store(arr::NDArray) end function get_ptr(arr::NDArray{T,N}) where {T,N} - # Get the raw Legate array impl - st_handle = get_store(arr) # CxxPtr{LogicalArrayImpl} - # Wrap it in the high-level LogicalArray struct expected by Legate.get_ptr - # st_handle[] dereferences the CxxPtr to get the LogicalArrayImpl object - la = Legate.LogicalArray{T,N}(st_handle[], size(arr)) + st_handle = get_store(arr) # LogicalArrayImplAllocated (returned by value) + la = Legate.LogicalArray{T,N}(st_handle, size(arr)) return Legate.get_ptr(la) end @@ -494,3 +505,9 @@ function compare(arr::NDArray{T,N}, arr2::NDArray{T,N}, atol::Real, rtol::Real) # successful completion return true end + +function nda_to_logical_store(arr::NDArray{T,N}) where {T,N} + la_handle = cuNumeric.get_store(arr) # LogicalArrayImplAllocated (returned by value) + st_handle = Legate.data(Legate.LogicalArray{T,N}(la_handle, size(arr))) + return Legate.LogicalStore{T,N}(st_handle, size(arr)) +end diff --git a/src/ndarray/linalg.jl b/src/ndarray/linalg.jl new file mode 100644 index 00000000..1c01b44a --- /dev/null +++ b/src/ndarray/linalg.jl @@ -0,0 +1,119 @@ +function choose_nd_color_shape(shape::NTuple{N,Int}) where {N} + color_shape = Base.ones(Int, N) + if N > 2 + color_shape[1] = Legate.num_procs() + done = false + while !done && color_shape[1] % 2 == 0 + weight_per_dim = [shape[i] / color_shape[i] for i in 1:(N - 2)] + max_weight, idx = findmax(weight_per_dim) + if weight_per_dim[idx] > 2 * weight_per_dim[1] + color_shape[1] ÷= 2 + color_shape[idx] *= 2 + else + done = true + end + end + end + return Tuple(color_shape) +end + +function prepare_manual_task_for_batched_matrices(full_shape::NTuple{N,Int}) where {N} + initial_color_shape = choose_nd_color_shape(full_shape) + tilesize = Tuple( + (full_shape[i] + initial_color_shape[i] - 1) ÷ initial_color_shape[i] for i in 1:N + ) + color_shape = Tuple((full_shape[i] + tilesize[i] - 1) ÷ tilesize[i] for i in 1:N) + return tilesize, color_shape +end + +function solve_batched(a::NDArray{T,N}, b::NDArray, x::NDArray) where {T,N} + nrhs = size(b)[end] + full_shape = size(a) + tilesize_a, color_shape = prepare_manual_task_for_batched_matrices(full_shape) + tilesize_b = (tilesize_a[1:(end - 1)]..., nrhs) + + store_a = nda_to_logical_store(a) + store_b = nda_to_logical_store(b) + store_x = nda_to_logical_store(x) + + tiled_a = Legate.partition_by_tiling(store_a, collect(tilesize_a)) + tiled_b = Legate.partition_by_tiling(store_b, collect(tilesize_b)) + tiled_x = Legate.partition_by_tiling(store_x, collect(tilesize_b)) + + rt = Legate.get_runtime() + domain = Legate.domain_from_shape(Legate.Shape(Legate.to_cxx_vector(color_shape))) + lib = cuNumeric.get_lib() + task = Legate.create_manual_task(rt, lib, cuNumeric.SOLVE, domain) + + Legate.add_input(task, tiled_a) + Legate.add_input(task, tiled_b) + Legate.add_output(task, tiled_x) + + Legate.submit_manual_task(rt, task) +end + +# solve runs in floating point: +# int/bool inputs promote to Float64 (matching cupynumeric) +const _SOLVE_PROMOTABLE = Union{SUPPORTED_INT_TYPES,Bool} +const _SOLVE_ACCEPTED = Union{SUPPORTED_SOLVE_TYPES,_SOLVE_PROMOTABLE} +_solve_eltype(::Type{T}) where {T<:_SOLVE_PROMOTABLE} = Float64 +_solve_eltype(::Type{T}) where {T<:SUPPORTED_SOLVE_TYPES} = T + +# Type/dim guards dispatch on one argument at a time, then forward to `_solve`. +function solve(a::NDArray{<:_SOLVE_ACCEPTED}, b::NDArray{<:_SOLVE_ACCEPTED}) + A, B = eltype(a), eltype(b) + O = promote_type(_solve_eltype(A), _solve_eltype(B)) + # int/bool -> float is an implicit promotion, disallowed unless `allowpromotion` + A <: _SOLVE_PROMOTABLE && assertpromotion(solve, A, O) + B <: _SOLVE_PROMOTABLE && assertpromotion(solve, B, O) + return _solve_check_a_dims(unchecked_promote_arr(a, O), unchecked_promote_arr(b, O)) +end + +function solve(a::NDArray, b::NDArray) + bad = eltype(a) <: _SOLVE_ACCEPTED ? eltype(b) : eltype(a) + throw(ArgumentError("array type $bad is unsupported in solve")) +end + +# `a` must be at least 2D, `b` at least 1D. +function _solve_check_a_dims(a::NDArray{<:Any,0}, b::NDArray) + throw(ArgumentError("0-dimensional array given. Array must be at least two-dimensional")) +end +function _solve_check_a_dims(a::NDArray{<:Any,1}, b::NDArray) + throw(ArgumentError("1-dimensional array given. Array must be at least two-dimensional")) +end +_solve_check_a_dims(a::NDArray, b::NDArray) = _solve_check_b_dims(a, b) + +function _solve_check_b_dims(a::NDArray, b::NDArray{<:Any,0}) + throw(ArgumentError("0-dimensional array given. Array must be at least one-dimensional")) +end +_solve_check_b_dims(a::NDArray, b::NDArray) = _solve(a, b) + +# 2D case: (m,m),(m)->(m). +# Backend needs rhs "b" to be 2D. We reshape b from (n,) to (n,1) +function _solve(a::NDArray{T,2}, b::NDArray{S,1}) where {T,S} + m = size(b)[1] + return reshape(_solve(a, reshape(b, (m, 1))), (m,)) +end + +# 2D (m,m),(m,n)->(m,n) and batched (...,m,m),(...,m,n)->(...,m,n) +function _solve(a::NDArray{T,N}, b::NDArray{S,N}) where {T,S,N} + size(a)[end - 1] != size(a)[end] && + throw(ArgumentError("Last 2 dimensions of the array must be square")) + size(a)[end] != size(b)[end - 1] && + throw( + ArgumentError( + "Input operand 1 has a mismatch in its dimension " * + "$(N-2), with signature (...,m,m),(...,m,n)->(...,m,n)" * + " (size $(size(b)[end-1]) is different from $(size(a)[end]))", + ), + ) + prod(size(a)) == 0 || prod(size(b)) == 0 && return zeros(T, size(b)...) + x = zeros(T, size(b)...) + solve_batched(a, b, x) + return x +end + +# Mismatched batch dimensions +function _solve(a::NDArray{T,N}, b::NDArray{S,M}) where {T,N,S,M} + throw(ArgumentError("Batched matrices require signature (...,m,m),(...,m,n)->(...,m,n)")) +end diff --git a/src/ndarray/ndarray.jl b/src/ndarray/ndarray.jl index d58f6572..086a60ca 100644 --- a/src/ndarray/ndarray.jl +++ b/src/ndarray/ndarray.jl @@ -30,21 +30,27 @@ function transpose(arr::NDArray) end @doc""" - cuNumeric.eye(rows::Int; T=Float32) + cuNumeric.eye([T,] rows::Int) -Create a 2D identity `NDArray` of size `rows x rows` with element type `T`. +Create a 2D identity `NDArray` of size `rows x rows` with element type `T` +(defaults to `DEFAULT_FLOAT`). """ -function eye(rows::Int; T::Type{S}=Float64) where {S} - return nda_eye(Int32(rows), S) +function eye(::Type{T}, rows::Int) where {T} + return nda_eye(Int32(rows), T) +end +function eye(rows::Int) + return eye(DEFAULT_FLOAT, rows) end @doc""" - cuNumeric.trace(arr::NDArray; offset=0, a1=0, a2=1, T=Float32) + cuNumeric.trace(arr::NDArray; offset=0, a1=0, a2=1) -Compute the trace of the `NDArray` along the specified axes. +Compute the trace (sum of a diagonal) of the `NDArray`. +The accumulator type follows promotions of other reductions like 'sum'. """ -function trace(arr::NDArray; offset::Int=0, a1::Int=0, a2::Int=1, T::Type{S}=Float32) where {S} - return nda_trace(arr, Int32(offset), Int32(a1), Int32(a2), S) +function trace(arr::NDArray{T}; offset::Int=0, a1::Int=0, a2::Int=1) where {T} + T_OUT = Base.promote_op(Base.sum, Vector{T}) + return nda_trace(arr, Int32(offset), Int32(a1), Int32(a2), T_OUT) end @doc""" diff --git a/src/ndarray/unary.jl b/src/ndarray/unary.jl index 6cc7b749..96e69f98 100644 --- a/src/ndarray/unary.jl +++ b/src/ndarray/unary.jl @@ -209,9 +209,11 @@ The following unary reduction operations are supported and can be applied direct • `prod` • `sum` - These operations follow standard Julia semantics. +Reduction over specific dimensions is supported via the `dims` keyword argument, +following the same semantics as Julia's base reduction functions. + Examples -------- @@ -220,6 +222,14 @@ A = cuNumeric.ones(5) maximum(A) sum(A) + +# Reduce over a specific dimension +B = cuNumeric.ones(3, 4) +sum(B, dims=1) # 1×4 result +sum(B, dims=2) # 3×1 result + +# Reduce over multiple dimensions +sum(B, dims=(1,2)) # 1×1 result ``` """ global const unary_reduction_map = Dict{Function,UnaryRedCode}( @@ -242,26 +252,57 @@ global const unary_reduction_map = Dict{Function,UnaryRedCode}( #! IT WOULD BE NICE IF THESE JUST RETURNED SCALARS WHEN APPROPRIATE # #*TODO HOW TO GET THESE ACTING ON CERTAIN DIMS + +function _unary_reduction_impl(base_func, op_code, input::NDArray{T}, ::Colon) where {T} + T_OUT = Base.promote_op(base_func, Vector{T}) + is_wider_type(T_OUT, T) && assertpromotion(base_func, T, T_OUT) + out = cuNumeric.zeros(T_OUT) + return nda_unary_reduction(out, op_code, unchecked_promote_arr(input, T_OUT)) +end + +function _unary_reduction_impl(base_func, op_code, input::NDArray{T,N}, dims::Integer) where {T,N} + T_OUT = Base.promote_op(base_func, Vector{T}) + is_wider_type(T_OUT, T) && assertpromotion(base_func, T, T_OUT) + axes = Int32[dims - 1] + return nda_unary_reduction_axes(op_code, unchecked_promote_arr(input, T_OUT), axes, true) +end + +function _unary_reduction_impl(base_func, op_code, input::NDArray{T,N}, dims::Tuple) where {T,N} + if length(dims) > 1 + error("$(base_func): reducing over multiple dimensions is not yet supported. Got dims=$dims") + end + # single element tuple + T_OUT = Base.promote_op(base_func, Vector{T}) + is_wider_type(T_OUT, T) && assertpromotion(base_func, T, T_OUT) + axes = Int32[dims[1] - 1] + return nda_unary_reduction_axes(op_code, unchecked_promote_arr(input, T_OUT), axes, true) +end + # Generate code for all unary reductions. for (base_func, op_code) in unary_reduction_map @eval begin - function $(Symbol(base_func))(input::NDArray{T}) where {T} - T_OUT = Base.promote_op($base_func, Vector{T}) - is_wider_type(T_OUT, T) && assertpromotion($base_func, T, T_OUT) - out = cuNumeric.zeros(T_OUT) #0D result (not right if reducing along dims) - return nda_unary_reduction(out, $(op_code), unchecked_promote_arr(input, T_OUT)) + function $(Symbol(base_func))(input::NDArray{T,N}; dims=Colon()) where {T,N} + return _unary_reduction_impl($base_func, $(op_code), input, dims) end end end -function Base.all(input::NDArray{Bool}) +function _bool_reduction_impl(op_code, input::NDArray{Bool}, ::Colon) out = cuNumeric.zeros(Bool) - return nda_unary_reduction(out, cuNumeric.ALL, input) + return nda_unary_reduction(out, op_code, input) end -function Base.any(input::NDArray{Bool}) - out = cuNumeric.zeros(Bool) - return nda_unary_reduction(out, cuNumeric.ANY, input) +function _bool_reduction_impl(op_code, input::NDArray{Bool}, dims) + axes = collect(Int32, (d - 1 for d in (dims isa Integer ? (dims,) : dims))) + return nda_unary_reduction_axes(op_code, input, axes, true) +end + +function Base.all(input::NDArray{Bool}; dims=Colon()) + return _bool_reduction_impl(cuNumeric.ALL, input, dims) +end + +function Base.any(input::NDArray{Bool}; dims=Colon()) + return _bool_reduction_impl(cuNumeric.ANY, input, dims) end #! ONLY ADD ONCE REDUCTIONS RETURN A SCALAR diff --git a/src/utilities/preference.jl b/src/utilities/preference.jl index bb2effb9..1b2852d2 100644 --- a/src/utilities/preference.jl +++ b/src/utilities/preference.jl @@ -140,5 +140,10 @@ function find_dependency_paths(::Type{CNPreferences.JLL}) return results end -find_dependency_paths(::Type{CNPreferences.Developer}) = Dict{String,String}() +function find_dependency_paths(::Type{CNPreferences.Developer}) + isdefined(@__MODULE__, :cupynumeric_jll) || return Dict{String,String}() + paths = getfield(@__MODULE__, :cupynumeric_jll).LIBPATH_list + return Dict(name => dirname(Libdl.find_library(lib, paths)) for (name, lib) in DEPS_MAP) +end + find_dependency_paths(::Type{CNPreferences.Conda}) = Dict{String,String}() diff --git a/src/utilities/version.jl b/src/utilities/version.jl index b2cf79fa..5dabd527 100644 --- a/src/utilities/version.jl +++ b/src/utilities/version.jl @@ -15,8 +15,13 @@ function get_cxx_version(libpath::AbstractString) end function read_githash() - githash_path = joinpath(@__DIR__, "../", "../", ".githash") - return isfile(githash_path) ? readchomp(githash_path) : "unknown" + try + pkg_root = joinpath(@__DIR__, "..", "..") + hash = readchomp(`git -C $pkg_root rev-parse HEAD`) + isempty(hash) || return hash + catch + end + return "unknown" end @doc""" diff --git a/test/Project.toml b/test/Project.toml index 7dd0521e..8e41dd68 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] +CNPreferences = "3e078157-ea10-49d5-bf32-908f777cd46f" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CUDA_Driver_jll = "4ee394cb-3365-5eb0-8335-949819d2adfc" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/test/runtests.jl b/test/runtests.jl index d2396b3f..55bb4af1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -167,6 +167,37 @@ end end end +@testset verbose=true "Unary Reductions with Dims" begin + N = 100 + + @testset for T in Base.uniontypes(cuNumeric.SUPPORTED_ARRAY_TYPES) + julia_arr_1D = my_rand(T, N) + julia_arr_2D = my_rand(T, isqrt(N), isqrt(N)) + + cunumeric_arr_1D = @allowscalar NDArray(julia_arr_1D) + cunumeric_arr_2D = @allowscalar NDArray(julia_arr_2D) + + @testset "$(func)" for (func, _) in cuNumeric.unary_reduction_map + # Skip reductions not supported by the cuNumeric backend for complex types + if T <: Complex && ( + func == Base.maximum || + func == Base.minimum || + func == Base.prod + ) + continue + end + + ## TODO Int8 min/max along an axis is broken on GPU + if cuNumeric.HAS_CUDA && T == Int8 && (func == Base.minimum || func == Base.maximum) + continue + end + + test_unary_reduction_dims(func, julia_arr_1D, cunumeric_arr_1D) + test_unary_reduction_dims(func, julia_arr_2D, cunumeric_arr_2D) + end + end +end + @testset verbose = true "Binary Ops" begin N = 100 diff --git a/test/tests/linalg.jl b/test/tests/linalg.jl index 32a18100..8d5ededb 100644 --- a/test/tests/linalg.jl +++ b/test/tests/linalg.jl @@ -19,22 +19,24 @@ =# @testset "transpose" begin - A = rand(Float64, 4, 3) - nda = cuNumeric.NDArray(A) + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_NUMERIC_TYPES) + A = my_rand(T, 4, 3) + nda = cuNumeric.NDArray(A) - ref = transpose(A) - out = cuNumeric.transpose(nda) + ref = transpose(A) + out = cuNumeric.transpose(nda) - allowscalar() do - @test cuNumeric.compare(ref, out, atol(Float64), rtol(Float64)) + allowscalar() do + @test cuNumeric.compare(ref, out, atol(T), rtol(T)) + end end end @testset "eye" begin - for T in (Float32, Float64, Int32) + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_NUMERIC_TYPES) n = 5 ref = Matrix{T}(I, n, n) - out = cuNumeric.eye(n; T=T) + out = cuNumeric.eye(T, n) allowscalar() do @test cuNumeric.compare(ref, out, atol(T), rtol(T)) end @@ -42,41 +44,45 @@ end end @testset "trace" begin - A = rand(Float64, 6, 6) - nda = cuNumeric.NDArray(A) + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_NUMERIC_TYPES) + A = my_rand(T, 6, 6) + nda = cuNumeric.NDArray(A) - ref = tr(A) - out = cuNumeric.trace(nda) - - allowscalar() do - @test ref ≈ out[1] atol=atol(Float32) rtol=rtol(Float32) + ref = sum(diag(A)) # widens ints like trace's accumulator + out = cuNumeric.trace(nda) + allowscalar() do + @test ref ≈ out[1] atol=atol(eltype(ref)) rtol=rtol(eltype(ref)) + end end end @testset "trace with offset" begin - A = rand(Float32, 5, 5) - nda = cuNumeric.NDArray(A) - - for k in (-2, -1, 0, 1, 2) - ref = sum(diag(A, k)) - out = cuNumeric.trace(nda; offset=k) - - allowscalar() do - @test ref ≈ out[1] atol=atol(Float32) rtol=rtol(Float32) + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_NUMERIC_TYPES) + A = my_rand(T, 5, 5) + nda = cuNumeric.NDArray(A) + + @testset "offset=$(k)" for k in (-2, -1, 0, 1, 2) + ref = sum(diag(A, k)) + out = cuNumeric.trace(nda; offset=k) + allowscalar() do + @test ref ≈ out[1] atol=atol(eltype(ref)) rtol=rtol(eltype(ref)) + end end end end @testset "diag" begin - A = rand(Int, 6, 6) - nda = cuNumeric.NDArray(A) + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_NUMERIC_TYPES) + A = my_rand(T, 6, 6) + nda = cuNumeric.NDArray(A) - for k in (-2, 0, 3) - ref = diag(A, k) - out = cuNumeric.diag(nda; k=k) + @testset "k=$(k)" for k in (-2, 0, 3) + ref = diag(A, k) + out = cuNumeric.diag(nda; k=k) - allowscalar() do - @test cuNumeric.compare(ref, out, atol(Int32), rtol(Int32)) + allowscalar() do + @test cuNumeric.compare(ref, out, atol(T), rtol(T)) + end end end end @@ -94,11 +100,88 @@ end # end @testset "unique" begin - A = [1, 2, 2, 3, 4, 4, 4, 5] - nda = cuNumeric.NDArray(A) + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_NUMERIC_TYPES) + A = T[1, 2, 2, 3, 4, 4, 4, 5] + nda = cuNumeric.NDArray(A) + + ref = unique(A) + out = cuNumeric.unique(nda) + + @test Set(Array(out)) == Set(ref) + end +end + +@testset "solve diagonal" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SOLVE_TYPES) + n = 4 + A = cuNumeric.zeros(T, n, n) + b = cuNumeric.zeros(T, n, 1) + cuNumeric.@allowscalar for i in 1:n + A[i, i] = T(4) + b[i, 1] = T(1) + end + x = cuNumeric.solve(A, b) + allowscalar() do + @test cuNumeric.compare(fill(T(0.25), n, 1), x, atol(T), rtol(T)) + end + end +end + +@testset "solve identity" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SOLVE_TYPES) + n = 4 + A = cuNumeric.NDArray(Matrix{T}(I, n, n)) + b = cuNumeric.NDArray(reshape(T.(collect(1:n)), n, 1)) + x = cuNumeric.solve(A, b) + ref = reshape(T.(collect(1:n)), n, 1) + allowscalar() do + @test cuNumeric.compare(ref, x, atol(T), rtol(T)) + end + end +end + +@testset "solve general" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SOLVE_TYPES) + A_ref = T[2 1; 5 7] + b_ref = T[11; 13;;] # creates a 2d matrix instead of vector + A = cuNumeric.NDArray(A_ref) + b = cuNumeric.NDArray(b_ref) + x = cuNumeric.solve(A, b) + ref = A_ref \ b_ref + allowscalar() do + @test cuNumeric.compare(ref, x, atol(T), rtol(T)) + end + end +end - ref = unique(A) - out = cuNumeric.unique(nda) +@testset "solve vector rhs" begin + @testset verbose=true for T in Base.uniontypes(cuNumeric.SUPPORTED_SOLVE_TYPES) + A_ref = T[2 1; 5 7] + b_ref = T[11, 13] + x = cuNumeric.solve(cuNumeric.NDArray(A_ref), cuNumeric.NDArray(b_ref)) + @test ndims(x) == 1 + ref = A_ref \ b_ref + allowscalar() do + @test cuNumeric.compare(ref, x, atol(T), rtol(T)) + end + end +end - @test sort(Array(out)) == sort(ref) +@testset "solve promotion" begin + @testset verbose=true for T in (Int32, Int64, Bool) + A = cuNumeric.NDArray(T[1 0; 0 1]) + b = cuNumeric.NDArray(reshape(T[1, 1], 2, 1)) + + # int/bool requires promotion to float. Will throw without allowpromtion() + @test_throws "Implicit promotion" cuNumeric.solve(A, b) + + # ...allowed under @allowpromotion, result is Float64 + allowpromotion() do + x = cuNumeric.solve(A, b) + ref = Float64[1 0; 0 1] \ Float64[1; 1;;] + allowscalar() do + @test cuNumeric.compare(ref, x, atol(Float64), rtol(Float64)) + end + end + end end diff --git a/test/tests/stability.jl b/test/tests/stability.jl index 648f4bc4..8eaa15a2 100644 --- a/test/tests/stability.jl +++ b/test/tests/stability.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -98,3 +98,36 @@ end @test @inferred(a ./ b) !== nothing @test @inferred(((a .* b) .+ a) .* 2.0f0) !== nothing end + +@testset verbose = true "solve" begin + # native float/complex, 2D and 1D rhs + @testset "$(T)" for T in Base.uniontypes(cuNumeric.SUPPORTED_SOLVE_TYPES) + A = cuNumeric.NDArray(T[2 1; 5 7]) + b2 = cuNumeric.NDArray(T[11; 13;;]) # creates a 2d matrix instead of vector + b1 = cuNumeric.NDArray(T[11, 13]) + @test @inferred(cuNumeric.solve(A, b2)) !== nothing + @test @inferred(cuNumeric.solve(A, b1)) !== nothing + end + + # int/bool promote to Float64 (under allowpromotion) and stay inferrable + @testset "promote $(T)" for T in (Int32, Int64, Bool) + A = cuNumeric.NDArray(T[1 0; 0 1]) + b = cuNumeric.NDArray(reshape(T[1, 1], 2, 1)) + allowpromotion() do + @test @inferred(cuNumeric.solve(A, b)) !== nothing + end + end +end + +@testset verbose = true "linalg ops" begin + @testset "$(T)" for T in Base.uniontypes(cuNumeric.SUPPORTED_NUMERIC_TYPES) + M = cuNumeric.zeros(T, 4, 3) + sq = cuNumeric.zeros(T, 5, 5) + v = cuNumeric.zeros(T, 8) + @test @inferred(cuNumeric.eye(T, 5)) !== nothing + @test @inferred(cuNumeric.transpose(M)) !== nothing + @test @inferred(cuNumeric.trace(sq)) !== nothing + @test @inferred(cuNumeric.diag(sq)) !== nothing + @test @inferred(cuNumeric.unique(v)) !== nothing + end +end diff --git a/test/tests/unary_tests.jl b/test/tests/unary_tests.jl index 88f284ff..8c1e4f52 100644 --- a/test/tests/unary_tests.jl +++ b/test/tests/unary_tests.jl @@ -1,4 +1,4 @@ -#= Copyright 2026 Northwestern University, +#= Copyright 2026 Northwestern University, * Carnegie Mellon University University * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -88,3 +88,23 @@ function test_unary_function_set(func_dict, T, N) test_unary_operation(func, julia_arr_2D, cunumeric_arr_2D, T) end end + +function test_unary_reduction_dims( + func, julia_arr::AbstractArray{T,N}, cunumeric_arr::NDArray{T,N} +) where {T,N} + allowpromotion(true) do + for d in 1:N + julia_res = func(julia_arr; dims=d) + cunumeric_res = func(cunumeric_arr; dims=d) + allowscalar() do + @test cuNumeric.compare(julia_res, cunumeric_res, atol(T), rtol(T)) + end + end + + # we are testing a multi axis reduction. This will throw a runtime error. + # https://github.com/nv-legate/cupynumeric/blob/main/src/cupynumeric/ndarray.cc#L1132 + if N >= 2 + @test_throws Exception func(cunumeric_arr, dims=(1, 2)) + end + end +end