diff --git a/Project.toml b/Project.toml index 44ab1f2..1854dbd 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ ExaPF = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa" ExaTron = "28b18bf8-76f9-41ea-81fa-0f922810b349" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9" +JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/src/ExaAdmmBackend/ExaAdmmBackend.jl b/src/ExaAdmmBackend/ExaAdmmBackend.jl index 9f05101..81f2e67 100644 --- a/src/ExaAdmmBackend/ExaAdmmBackend.jl +++ b/src/ExaAdmmBackend/ExaAdmmBackend.jl @@ -6,6 +6,7 @@ import MPI using KernelAbstractions using ExaAdmm using ExaTron +using JLD const KA = KernelAbstractions mutable struct ModelProxAL{T,TD,TI,TM} <: ExaAdmm.AbstractOPFModel{T,TD,TI,TM} diff --git a/src/ExaAdmmBackend/proxal_admm_ka.jl b/src/ExaAdmmBackend/proxal_admm_ka.jl index b5b3eb9..16ccb99 100644 --- a/src/ExaAdmmBackend/proxal_admm_ka.jl +++ b/src/ExaAdmmBackend/proxal_admm_ka.jl @@ -1,23 +1,26 @@ @kernel function generator_kernel_two_level_proxal_ka(ngen::Int, gen_start::Int, u, xbar, z, l, rho, - pgmin, pgmax, - qgmin, qgmax, - smin, smax, s, + @Const(pgmin), @Const(pgmax), + @Const(qgmin), @Const(qgmax), + @Const(smin), @Const(smax), s, _A, _c) tx = @index(Local, Linear) I = @index(Group, Linear) + # grpsize = @groupsize + # range = @ndrange + # AMDGPU.@rocprintf "generator_kernel %s %s %s\n" ngen grpsize range + x = @localmem Float64 (2,) + xl = @localmem Float64 (2,) + xu = @localmem Float64 (2,) - n = 2 + A = @localmem Float64 (2,2) + c = @localmem Float64 (2,) if I <= ngen - x = @localmem Float64 (n,) - xl = @localmem Float64 (n,) - xu = @localmem Float64 (n,) + @synchronize - A = @localmem Float64 (n,n) - c = @localmem Float64 (n,) pg_idx = gen_start + 2*(I-1) qg_idx = gen_start + 2*(I-1) + 1 @@ -28,10 +31,10 @@ A_start = 4*(I-1) c_start = 2*(I-1) - if tx <= n - @inbounds begin - for j=1:n - A[tx,j] = _A[n*(j-1)+tx + A_start] + if tx <= 2 + # @inbounds begin + for j=1:2 + A[tx,j] = _A[2*(j-1)+tx + A_start] end c[tx] = _c[tx + c_start] @@ -39,24 +42,29 @@ A[1,1] += rho[pg_idx] c[1] += l[pg_idx] + rho[pg_idx]*(-xbar[pg_idx] + z[pg_idx]) end - end + # end end @synchronize - @inbounds begin - xl[1] = pgmin[I] - xu[1] = pgmax[I] - xl[2] = smin[I] - xu[2] = smax[I] - x[1] = min(xu[1], max(xl[1], u[pg_idx])) - x[2] = min(xu[2], max(xl[2], s[I])) + # @inbounds begin + if tx == 1 + xl[1] = pgmin[I] + xu[1] = pgmax[I] + xl[2] = smin[I] + xu[2] = smax[I] + x[1] = min(xu[1], max(xl[1], u[pg_idx])) + x[2] = min(xu[2], max(xl[2], s[I])) + end @synchronize - - status, minor_iter = ExaTron.ExaTronKAKernels.tron_qp_kernel(n, 500, 200, 1e-6, 1.0, x, xl, xu, A, c, tx) - - u[pg_idx] = x[1] - s[I] = x[2] - end + status, minor_iter = ExaAdmm.ExaTron.ExaTronKAKernels.tron_qp_kernel(2, 500, 200, 1e-6, 1.0, x, xl, xu, A, c, tx) + # AMDGPU.@rocprintf "atron_qp_kernel: %s\n" x[1] + @synchronize + if tx == 1 + u[pg_idx] = x[1] + s[I] = x[2] + end + @synchronize + # end end end @@ -67,16 +75,48 @@ function generator_kernel_two_level( ) where {AT, IAT} ngen = model.grid_data.ngen - - generator_kernel_two_level_proxal_ka(device, 32, 32*ngen)( - ngen, model.gen_start, + # println("Pgmin/Pgmax: $(model.grid_data.pgmin), $(model.grid_data.pgmax)") + # println("smin/smax: $(model.smin), $(model.smax)") + # println("device: $device") + # println("typeof: $(typeof(model.grid_data.pgmin)) $(typeof(model.smin))") + d = load("/lustre/orion/csc359/scratch/mschanen/git/milepost7/data.fld") + ngen = d["ngen"] + gen_start = d["model.gen_start"] + u = d["u |> Array"] |> ROCArray + xbar = d["xbar |> Array"] |> ROCArray + zu = d["zu |> Array"] |> ROCArray + lu = d["lu |> Array"] |> ROCArray + rho_u = d["rho_u |> Array"] |> ROCArray + pgmin = d["model.grid_data.pgmin |> Array"] |> ROCArray + pgmax = d["model.grid_data.pgmax |> Array"] |> ROCArray + qgmin = d["model.grid_data.qgmin |> Array"] |> ROCArray + qgmax = d["model.grid_data.qgmax |> Array"] |> ROCArray + smin = d["model.smin |> Array"] |> ROCArray + smax = d["model.smax |> Array"] |> ROCArray + s_curr = d["model.s_curr |> Array"] |> ROCArray + Q_ref = d["model.Q_ref |> Array"] |> ROCArray + c_ref = d["model.c_ref |> Array"] |> ROCArray + KA.synchronize(device) + generator_kernel_two_level_proxal_ka(ROCBackend(), 1, 1)( + ngen, gen_start, u, xbar, zu, lu, rho_u, - model.grid_data.pgmin, model.grid_data.pgmax, - model.grid_data.qgmin, model.grid_data.qgmax, - model.smin, model.smax, model.s_curr, - model.Q_ref, model.c_ref, - ndrange=(ngen,ngen) + pgmin, pgmax, + qgmin, qgmax, + smin, smax, s_curr, + Q_ref, c_ref, + #ndrange=(ngen,ngen) ) + # generator_kernel_two_level_proxal_ka(device, 1, 1)( + # ngen, model.gen_start, + # u, xbar, zu, lu, rho_u, + # model.grid_data.pgmin, model.grid_data.pgmax, + # model.grid_data.qgmin, model.grid_data.qgmax, + # model.smin, model.smax, model.s_curr, + # model.Q_ref, model.c_ref, + # ) KA.synchronize(device) + println("u: $u") + println("s: $(model.s_curr)") + println("Synchronize done") return 0.0 end diff --git a/src/ProxAL.jl b/src/ProxAL.jl index 38ef220..abb3ab0 100644 --- a/src/ProxAL.jl +++ b/src/ProxAL.jl @@ -12,6 +12,7 @@ using LinearAlgebra using SparseArrays using MPI using HDF5 +using JLD const KA = KernelAbstractions abstract type AbstractPrimalSolution end