Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] Debug Lux test failure on GPU #736

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ steps:
version:
- "1.10"
group:
- core
# - core
- neural_networks
- integration
# - integration
plugins:
- JuliaCI/julia#v1:
version: "{{matrix.version}}"
Expand Down
165 changes: 0 additions & 165 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -1,165 +0,0 @@
name: CI

on:
pull_request:
paths:
- '.github/workflows/CI.yml'
- 'deps/**'
- 'ext/**'
- 'lib/**'
- 'src/**'
- 'test/**'
- 'Project.toml'
push:
branches:
- main
- release-*
tags: '*'
paths:
- '.github/workflows/CI.yml'
- 'deps/**'
- 'ext/**'
- 'lib/**'
- 'src/**'
- 'test/**'
- 'Project.toml'

concurrency:
# Skip intermediate builds: always.
# Cancel intermediate builds: only if it is a pull request build.
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}

jobs:
test:
name: Julia ${{ matrix.version }} - ${{ matrix.test_group }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.libReactant }} libReactant - assertions=${{ matrix.assertions }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
version:
- '1.10'
- '1.11'
# - 'nightly'
os:
- ubuntu-24.04
# `ubuntu-22.04-arm` is considered more stable than `ubuntu-24.04-arm`:
# <https://github.com/orgs/community/discussions/148648#discussioncomment-12099554>.
- ubuntu-22.04-arm
- macOS-latest
test_group:
- core
- neural_networks
- integration
arch:
- x64
- aarch64
assertions:
- false
libReactant: [packaged]
include:
- os: ubuntu-24.04
arch: x64
libReactant: packaged
version: '1.10'
assertions: true
test_group: core
- os: ubuntu-24.04
arch: x64
libReactant: packaged
version: '1.10'
assertions: true
test_group: neural_networks
- os: ubuntu-24.04
arch: x64
libReactant: packaged
version: '1.10'
assertions: true
test_group: integration
# - os: ubuntu-24.04
# arch: x86
# libReactant: packaged
# version: '1.10'
# test_group: core
# - os: ubuntu-24.04
# arch: x86
# libReactant: packaged
# version: '1.10'
# test_group: neural_networks
# - os: ubuntu-24.04
# arch: x86
# libReactant: packaged
# version: '1.10'
# test_group: integration
exclude:
- os: ubuntu-24.04 # this is x86_64, exclude foreign architecture
arch: aarch64
- os: ubuntu-22.04-arm # this is aarch64, exclude foreign architecture
arch: x64
steps:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
if: ${{ ! matrix.assertions }}
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- uses: julia-actions/cache@v2
- uses: actions/checkout@v4
if: ${{ matrix.assertions }}
with:
repository: 'JuliaLang/julia'
ref: release-${{ matrix.version }}
path: 'julia'
- name: Compile Julia
if: ${{ matrix.assertions }}
run: |
sed -i.bak 's/exit 2/exit 0/g' julia/deps/tools/jlchecksum
make -C julia -j $(nproc) FORCE_ASSERTIONS=1 LLVM_ASSERTIONS=1 JULIA_PRECOMPILE=0
echo $PWD/julia/usr/bin >> $GITHUB_PATH
- name: Build libReactant
if: ${{ matrix.libReactant == 'local' && matrix.os != 'macOS-latest'}}
id: build_libreactant
run: |
python -m pip install numpy
julia --color=yes --project=deps -e 'using Pkg; Pkg.instantiate()'
julia --color=yes --project=deps deps/build_local.jl
cp LocalPreferences.toml test/
- name: Build libReactant MacOS
if: ${{ matrix.libReactant == 'local' && matrix.os == 'macOS-latest'}}
id: build_libreactant_mac
run: |
python -m pip install numpy
julia --color=yes --project=deps -e 'using Pkg; Pkg.instantiate()'
SDKROOT=`xcrun --show-sdk-path` julia --color=yes --project=deps deps/build_local.jl
cp LocalPreferences.toml test/
- name: "Install Dependencies"
run: |
import Pkg
Pkg.Registry.update()
# Install packages present in subdirectories
dev_pks = Pkg.PackageSpec[]
for path in ("lib/ReactantCore",)
push!(dev_pks, Pkg.PackageSpec(; path))
end
Pkg.develop(dev_pks)
shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0}
# Only in Julia v1.10 we need to install `ReactantCore` manually.
if: ${{ matrix.version == '1.10' }}
env:
JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager
- name: "Run Tests"
run: |
import Pkg
Pkg.Registry.update()
Pkg.test(; coverage="user")
shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0}
id: run_tests
env:
JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager
REACTANT_TEST_GROUP: ${{ matrix.test_group }}
- uses: julia-actions/julia-processcoverage@v1
if: steps.run_tests.outcome == 'success'
- uses: codecov/codecov-action@v5
if: steps.run_tests.outcome == 'success'
with:
files: lcov.info
70 changes: 0 additions & 70 deletions .github/workflows/downgrade.yml
Original file line number Diff line number Diff line change
@@ -1,70 +0,0 @@
name: Downgrade

on:
pull_request:
branches:
- main
paths:
- '.github/workflows/downgrade.yml'
- 'ext/**'
- 'lib/**'
- 'src/**'
- 'Project.toml'
push:
branches:
- main
paths:
- '.github/workflows/downgrade.yml'
- 'ext/**'
- 'lib/**'
- 'src/**'
- 'Project.toml'

concurrency:
# Skip intermediate builds: always.
# Cancel intermediate builds: only if it is a pull request build.
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}

jobs:
downgrade:
# if: ${{ !contains(github.event.head_commit.message, '[skip tests]') && github.base_ref == github.event.repository.default_branch }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
test_group:
- core
- neural_networks
- integration
steps:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v2
with:
version: "1.10"
- uses: julia-actions/cache@v2
- uses: julia-actions/julia-downgrade-compat@v1
with:
skip: "ReactantCore"
- name: "Install Dependencies and Run Tests"
run: |
import Pkg
Pkg.Registry.update()
# Install packages present in subdirectories
dev_pks = Pkg.PackageSpec[]
for path in ("lib/ReactantCore",)
push!(dev_pks, Pkg.PackageSpec(; path))
end
Pkg.develop(dev_pks)
Pkg.test(; coverage="user")
shell: julia --color=yes --code-coverage=user --depwarn=yes --project=. {0}
id: run_tests
env:
JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager
REACTANT_TEST_GROUP: ${{ matrix.test_group }}
- uses: julia-actions/julia-processcoverage@v1
if: steps.run_tests.outcome == 'success'
- uses: codecov/codecov-action@v5
if: steps.run_tests.outcome == 'success'
with:
files: lcov.info
25 changes: 23 additions & 2 deletions test/nn/lux.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,27 +42,48 @@ end
cst2 = Reactant.to_rarray(st)
cnoisy = Reactant.ConcreteRArray(noisy)

@info @__LINE__
f = Reactant.compile((a, b, c, d) -> first(a(b, c, d)), (cmodel, cnoisy, cps, cst))
@info @__LINE__
f_mlir = Reactant.Compiler.compile_mlir((a, b, c, d) -> first(a(b, c, d)), (cmodel, cnoisy, cps, cst))[1]
@info @__LINE__
println(f_mlir)
@info @__LINE__

comp = f(cmodel, cnoisy, cps, cst)
@info @__LINE__

@test comp ≈ origout atol = 1e-3 rtol = 1e-2

target = onehotbatch(truth, [true, false]) # 2×1000 OneHotMatrix
@info @__LINE__

ctarget = Reactant.ConcreteRArray(Array{Float32}(target))
# ctarget = Reactant.to_rarray(target)
@info @__LINE__

res, dps = gradient_loss_function(model, noisy, target, ps, st)
@info @__LINE__

compiled_gradient = Reactant.compile(
gradient_loss_function, (cmodel, cnoisy, ctarget, cps, cst2)
)
@info @__LINE__
compiled_gradient_mlir = Reactant.Compiler.compile_mlir(
gradient_loss_function, (cmodel, cnoisy, ctarget, cps, cst2)
)[1]
@info @__LINE__

println(compiled_gradient_mlir)
@info @__LINE__

res_reactant, dps_reactant = compiled_gradient(cmodel, cnoisy, ctarget, cps, cst2)
@info @__LINE__
res_reactant_mlir, dps_reactant_mlir = @jit Reactant.Ops.hlo_call(repr(compiled_gradient_mlir), cmodel, cnoisy, ctarget, cps, cst2)
@info @__LINE__

@test res ≈ res_reactant atol = 1e-3 rtol = 1e-2
for (dps1, dps2) in zip(fleaves(dps), fleaves(dps_reactant))
@test res ≈ res_reactant_mlir atol = 1e-3 rtol = 1e-2
for (dps1, dps2) in zip(fleaves(dps), fleaves(dps_reactant_mlir))
@test dps1 ≈ dps2 atol = 1e-3 rtol = 1e-2
end
end
70 changes: 36 additions & 34 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using Reactant, SafeTestsets, Test

run(`nvidia-smi`)

# parse some command-line arguments
function extract_flag!(args, flag, default=nothing; typ=typeof(default))
for f in args
Expand Down Expand Up @@ -42,45 +44,45 @@ end
const REACTANT_TEST_GROUP = lowercase(get(ENV, "REACTANT_TEST_GROUP", "all"))

@testset "Reactant.jl Tests" begin
if REACTANT_TEST_GROUP == "all" || REACTANT_TEST_GROUP == "core"
@safetestset "Layout" include("layout.jl")
@safetestset "Tracing" include("tracing.jl")
@safetestset "Basic" include("basic.jl")
@safetestset "Autodiff" include("autodiff.jl")
@safetestset "Complex" include("complex.jl")
@safetestset "Broadcast" include("bcast.jl")
@safetestset "Struct" include("struct.jl")
@safetestset "Closure" include("closure.jl")
@safetestset "Compile" include("compile.jl")
@safetestset "Buffer Donation" include("buffer_donation.jl")
@safetestset "Shortcuts to MLIR ops" include("ops.jl")
@safetestset "Wrapped Arrays" include("wrapped_arrays.jl")
@safetestset "Control Flow" include("control_flow.jl")
@safetestset "Sorting" include("sorting.jl")
@safetestset "Indexing" include("indexing.jl")
if !Sys.isapple()
@safetestset "Custom Number Types" include("custom_number_types.jl")
end
@safetestset "Sharding" include("sharding.jl")
end
# if REACTANT_TEST_GROUP == "all" || REACTANT_TEST_GROUP == "core"
# @safetestset "Layout" include("layout.jl")
# @safetestset "Tracing" include("tracing.jl")
# @safetestset "Basic" include("basic.jl")
# @safetestset "Autodiff" include("autodiff.jl")
# @safetestset "Complex" include("complex.jl")
# @safetestset "Broadcast" include("bcast.jl")
# @safetestset "Struct" include("struct.jl")
# @safetestset "Closure" include("closure.jl")
# @safetestset "Compile" include("compile.jl")
# @safetestset "Buffer Donation" include("buffer_donation.jl")
# @safetestset "Shortcuts to MLIR ops" include("ops.jl")
# @safetestset "Wrapped Arrays" include("wrapped_arrays.jl")
# @safetestset "Control Flow" include("control_flow.jl")
# @safetestset "Sorting" include("sorting.jl")
# @safetestset "Indexing" include("indexing.jl")
# if !Sys.isapple()
# @safetestset "Custom Number Types" include("custom_number_types.jl")
# end
# @safetestset "Sharding" include("sharding.jl")
# end

if REACTANT_TEST_GROUP == "all" || REACTANT_TEST_GROUP == "integration"
@safetestset "CUDA" include("integration/cuda.jl")
@safetestset "KernelAbstractions" include("integration/kernelabstractions.jl")
@safetestset "Linear Algebra" include("integration/linear_algebra.jl")
@safetestset "OffsetArrays" include("integration/offsetarrays.jl")
@safetestset "AbstractFFTs" include("integration/fft.jl")
@safetestset "SpecialFunctions" include("integration/special_functions.jl")
@safetestset "Random" include("integration/random.jl")
@safetestset "Python" include("integration/python.jl")
end
# if REACTANT_TEST_GROUP == "all" || REACTANT_TEST_GROUP == "integration"
# @safetestset "CUDA" include("integration/cuda.jl")
# @safetestset "KernelAbstractions" include("integration/kernelabstractions.jl")
# @safetestset "Linear Algebra" include("integration/linear_algebra.jl")
# @safetestset "OffsetArrays" include("integration/offsetarrays.jl")
# @safetestset "AbstractFFTs" include("integration/fft.jl")
# @safetestset "SpecialFunctions" include("integration/special_functions.jl")
# @safetestset "Random" include("integration/random.jl")
# @safetestset "Python" include("integration/python.jl")
# end

if REACTANT_TEST_GROUP == "all" || REACTANT_TEST_GROUP == "neural_networks"
@testset "Neural Networks" begin
@safetestset "NNlib Primitives" include("nn/nnlib.jl")
@safetestset "Flux.jl Integration" include("nn/flux.jl")
# @safetestset "NNlib Primitives" include("nn/nnlib.jl")
# @safetestset "Flux.jl Integration" include("nn/flux.jl")
if Sys.islinux()
@safetestset "LuxLib Primitives" include("nn/luxlib.jl")
# @safetestset "LuxLib Primitives" include("nn/luxlib.jl")
@safetestset "Lux Integration" include("nn/lux.jl")
end
end
Expand Down
Loading