From 5936e54256a5eeed4ea60a00722b907e9488922e Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 6 Feb 2024 13:07:11 +0100 Subject: [PATCH 001/117] using AbstractGPs --- src/UncertaintyQuantification.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index fb011e114..56fc7a9cc 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -1,5 +1,6 @@ module UncertaintyQuantification +using AbstractGPs using Bootstrap using DataFrames using Dates From f75b5a5ea2c0f48658b7ae7c1ef2acea16d453c0 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 6 Feb 2024 13:07:40 +0100 Subject: [PATCH 002/117] GaussianProcessRegressor struct --- src/models/gaussianprocess.jl | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 src/models/gaussianprocess.jl diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl new file mode 100644 index 000000000..2b1449c75 --- /dev/null +++ b/src/models/gaussianprocess.jl @@ -0,0 +1,31 @@ +""" + GaussianProcess(data::DataFrame, dependendVarName::Symbol, deg::Int, dim::Int) + +Creates a gaussian process prior .... + +# Examples +```jldoctest +julia> data = DataFrame(x = 1:10, y = [1, 4, 10, 15, 24, 37, 50, 62, 80, 101]); + +julia> rs = ResponseSurface(data, :y, 2) |> DisplayAs.withcontext(:compact => true) +ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutative{CreationOrder}, Graded{LexOrder}}[1, x₁, x₁²]) +``` +""" +mutable struct GaussianProcessRegressor <: UQModel + gp::AbstractGP + y::Symbol + names::Vector{Symbol} + function GaussianProcessRegressor(gp::AbstractGP, data::DataFrame, output::Symbol) + # Choice for kernel and mean function is in gp, is that fine? + # Where to put output noise? + names = propertynames(data[:, Not(output)]) + return new(gp, output, names) + end +end + +function fit!(gpr::GaussianProcessRegressor) + +end + + + From ac83936e81603bac76d4142b1d1766d904a2be35 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 7 Feb 2024 20:32:28 +0100 Subject: [PATCH 003/117] Using GaussianProcesses --- src/UncertaintyQuantification.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index 56fc7a9cc..accaf3552 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -1,6 +1,5 @@ module UncertaintyQuantification -using AbstractGPs using Bootstrap using DataFrames using Dates @@ -10,6 +9,7 @@ using DynamicPolynomials using FastGaussQuadrature using FiniteDifferences using Formatting +using GaussianProcesses using KernelDensity using LinearAlgebra using Mustache From cf0be3d8504cc2f7f692ca7c4638418cb53481f4 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 7 Feb 2024 20:32:58 +0100 Subject: [PATCH 004/117] TODO: Proper normalization scheme --- src/models/gaussianprocess.jl | 56 ++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index 2b1449c75..65eff4c11 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -12,19 +12,55 @@ ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutativ ``` """ mutable struct GaussianProcessRegressor <: UQModel - gp::AbstractGP - y::Symbol - names::Vector{Symbol} - function GaussianProcessRegressor(gp::AbstractGP, data::DataFrame, output::Symbol) - # Choice for kernel and mean function is in gp, is that fine? - # Where to put output noise? - names = propertynames(data[:, Not(output)]) - return new(gp, output, names) - end + gp::GPBase + inputs::Union{Vector{<:UQInput}, Vector{Symbol}} + output::Symbol + input_normalizer::Union{Normalizer, Nothing} + output_normalizer::Union{Normalizer, Nothing} +end + +struct Normalizer + μ::Union{Real, Matrix{<:Real}} + σ::Union{Real, Matrix{<:Real}} end -function fit!(gpr::GaussianProcessRegressor) +normalize(data::Union{Vector{Real}, Matrix{<:Real}}, normalizer::Normalizer) = (data .- normalizer.μ) ./ normalizer.σ +denormalize(data::Union{Vector{Real}, Matrix{<:Real}}, normalizer::Normalizer) = data .* normalizer.σ .+ normalizer.μ + +function gaussianprocess( + df::DataFrame, + inputs::Vector{Symbol}, + output::Symbol, + kernel::Kernel, + mean::Mean=MeanZero(), + log_noise::Real=-2.0, + normalize_input::Bool=false, + normalize_output::Bool=false +) + X = Matrix(df[:, inputs])' + y = df[:, output] + + if normalize_input + normalizer_in = Normalizer(mean(X, dims=2), std(X, dims=2)) + X = normalize(X, normalizer_in) + else + normalizer_in = nothing + end + if normalize_output + normalizer_out = Normalizer(mean(y), std(y)) + y = normalize(y, normalizer_out) + else + normalizer_out = nothing + end + gp = GP(X, y, mean, kernel) + optimize!(gp) + + gp = GaussianProcess( + gp, inputs, output, size(X, 2), InputStandardizationGP(minimum(X), maximum(X)) + ) + + return gp, df end From 965a847f1bc5a18c1a6f4c9be23c90ee7060a29a Mon Sep 17 00:00:00 2001 From: Felix Date: Thu, 15 Feb 2024 13:48:17 +0100 Subject: [PATCH 005/117] Imports necessary for gps --- src/UncertaintyQuantification.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index accaf3552..4538aa956 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -13,6 +13,7 @@ using GaussianProcesses using KernelDensity using LinearAlgebra using Mustache +using Optim # needed for GPs to support other optimizers + settings using Primes using QuasiMonteCarlo using Random @@ -66,6 +67,7 @@ export ForwardFiniteDifferences export FractionalFactorial export FullFactorial export GaussianCopula +export GaussianProcessRegressor # do I need that to be exported? export GaussQuadrature export HaltonSampling export HermiteBasis @@ -101,10 +103,12 @@ export distribution_parameters export doe_samples export evaluate export evaluate! +export gaussianprocess export gradient export gradient_in_standard_normal_space export mean export multivariate_indices +export optimize_hyperparams! # Maybe rename this, so it is clear that this works on gaussianprocesses export polynomialchaos export probability_of_failure export qmc_samples @@ -130,6 +134,7 @@ include("solvers/solver.jl") include("solvers/extractor.jl") include("models/externalmodel.jl") +include("models/gaussianprocess.jl") include("models/model.jl") include("models/polyharmonicspline.jl") include("models/responsesurface.jl") From 7207f4cfde251fed303f6a601b21a2de0451aead Mon Sep 17 00:00:00 2001 From: Felix Date: Thu, 15 Feb 2024 13:50:37 +0100 Subject: [PATCH 006/117] Testing file, delete later --- demo/metamodels/gp_tests.jl | 55 +++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 demo/metamodels/gp_tests.jl diff --git a/demo/metamodels/gp_tests.jl b/demo/metamodels/gp_tests.jl new file mode 100644 index 000000000..df8faa712 --- /dev/null +++ b/demo/metamodels/gp_tests.jl @@ -0,0 +1,55 @@ +using UncertaintyQuantification +using GaussianProcesses # do we reexport for mean and kernel functions etc.? +using DataFrames +using Random +using Statistics + +Random.seed!(20140430) +# Training data +n=10; #number of training points +x = 2π * rand(n); #predictors +y = sin.(x) + 0.05*randn(n) .+ 1e3; #regressors + +#Select mean and covariance function +mZero = MeanZero() #Zero mean function +mConst = MeanConst(1.0) +kern = SE(0.0,0.0) #Sqaured exponential kernel (note that hyperparameters are on the log scale) + +inputs = :x +output = :y +df = DataFrame(inputs => x) +df[!, output] = y + +logObsNoise = -1.0 # log standard deviation of observation noise (this is optional) + +gp, = gaussianprocess(df, [inputs], output, kern, mZero, logObsNoise) + +# gp = GP(x,y,mConst,kern,logObsNoise) #Fit the GP +# gp_scaled = GP(x,y_scaled,mZero,kern,logObsNoise-log(std(y))) + +# gp = GP(x,y,mConst,kern) #Fit the GP +# gp_scaled = GP(x,y_scaled,mZero,kern, -2-log(std(y))) + +# μ, σ² = predict_y(gp,range(0,stop=2π,length=100)) +# a, b = predict_y(gp_scaled,range(0,stop=2π,length=100)) +# a_ = a .* std(y) .+ mean(y) +# b_ = b .* std(y) + +using Optim + +optimize_hyperparams!(gp; method=ConjugateGradient(), noise=false) # Optimise the hyperparameters +# optimize!(gp_scaled; method=ConjugateGradient(), noise=false) + +# plot(gp; legend=false, fmt=:png) #Plot the GP after the hyperparameters have been optimised + +# optimize!(gp; kern = false) # Don't optimize kernel hyperparameters +# optimize!(gp; kernbounds = [[-1, -1], [1, 1]]) # Optimize the kernel parameters in a box with lower bounds [-1, -1] and upper bounds [1, 1] + +# using Plots #Load Plots.jl package + +# scatter(x, y) +# plot!(range(0,stop=2π,length=100), μ, ribbon=σ²) +# plot!(range(0,stop=2π,length=100), a_, ribbon=b_) + +# plot(gp; xlabel="x", ylabel="y", title="Gaussian process", legend=false, fmt=:png) # Plot the GP +# plot(gp_scaled; xlabel="x", ylabel="y", title="Gaussian process", legend=false, fmt=:png) From c37df3ed354fd7d15ae10c1b8b3b38ef9c16fc00 Mon Sep 17 00:00:00 2001 From: Felix Date: Thu, 15 Feb 2024 13:51:16 +0100 Subject: [PATCH 007/117] Split gp fit and hyperparameter optimization --- src/models/gaussianprocess.jl | 55 ++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index 65eff4c11..a1e936cd6 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -11,6 +11,14 @@ julia> rs = ResponseSurface(data, :y, 2) |> DisplayAs.withcontext(:compact => tr ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutative{CreationOrder}, Graded{LexOrder}}[1, x₁, x₁²]) ``` """ +struct Normalizer # Maybe move this somewhere else... + μ::Union{Real, Matrix{<:Real}} + σ::Union{Real, Matrix{<:Real}} +end + +normalize(data::Union{Vector{Real}, Matrix{<:Real}}, normalizer::Normalizer) = (data .- normalizer.μ) ./ normalizer.σ +denormalize(data::Union{Vector{Real}, Matrix{<:Real}}, normalizer::Normalizer) = data .* normalizer.σ .+ normalizer.μ + mutable struct GaussianProcessRegressor <: UQModel gp::GPBase inputs::Union{Vector{<:UQInput}, Vector{Symbol}} @@ -19,20 +27,12 @@ mutable struct GaussianProcessRegressor <: UQModel output_normalizer::Union{Normalizer, Nothing} end -struct Normalizer - μ::Union{Real, Matrix{<:Real}} - σ::Union{Real, Matrix{<:Real}} -end - -normalize(data::Union{Vector{Real}, Matrix{<:Real}}, normalizer::Normalizer) = (data .- normalizer.μ) ./ normalizer.σ -denormalize(data::Union{Vector{Real}, Matrix{<:Real}}, normalizer::Normalizer) = data .* normalizer.σ .+ normalizer.μ - function gaussianprocess( df::DataFrame, inputs::Vector{Symbol}, output::Symbol, kernel::Kernel, - mean::Mean=MeanZero(), + mean::GaussianProcesses.Mean=MeanZero(), log_noise::Real=-2.0, normalize_input::Bool=false, normalize_output::Bool=false @@ -41,27 +41,42 @@ function gaussianprocess( y = df[:, output] if normalize_input - normalizer_in = Normalizer(mean(X, dims=2), std(X, dims=2)) - X = normalize(X, normalizer_in) + input_normalizer = Normalizer(mean(X, dims=2), std(X, dims=2)) + X = normalize(X, input_normalizer) else - normalizer_in = nothing + input_normalizer = nothing end if normalize_output - normalizer_out = Normalizer(mean(y), std(y)) - y = normalize(y, normalizer_out) + output_normalizer = Normalizer(mean(y), std(y)) + y = normalize(y, output_normalizer) + log_noise -= log(output_normalizer.σ) else - normalizer_out = nothing + output_normalizer = nothing end - gp = GP(X, y, mean, kernel) - optimize!(gp) + gp = GP(X, y, mean, kernel, log_noise) - gp = GaussianProcess( - gp, inputs, output, size(X, 2), InputStandardizationGP(minimum(X), maximum(X)) - ) + gp = GaussianProcessRegressor( + gp, inputs, output, + input_normalizer, output_normalizer + ) return gp, df end +# Wrapper for optimize! method from GaussianProcesses.jl +function optimize_hyperparams!(gpr::GaussianProcessRegressor, args...; method = LBFGS(), + domean::Bool = true, kern::Bool = true, noise::Bool = true, + lik::Bool = true, meanbounds = nothing, kernbounds = nothing, + noisebounds = nothing, likbounds = nothing, kwargs... +) + + optimize!(gpr.gp, args...; method = method, + domean=domean, kern=kern, noise=noise, lik=lik, + meanbounds=meanbounds, kernbounds=kernbounds, + noisebounds=noisebounds, likbounds=likbounds, + kwargs...) +end + From 131cfa40adc0a2934de56ff8c7cade3378b035ce Mon Sep 17 00:00:00 2001 From: Felix Date: Thu, 15 Feb 2024 13:52:00 +0100 Subject: [PATCH 008/117] Additional packages for gaussian-processes --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index b5b1357b9..6d6603bb9 100644 --- a/Project.toml +++ b/Project.toml @@ -15,9 +15,11 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07" FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" +GaussianProcesses = "891a1506-143c-57d2-908e-e1f8e92e6de9" KernelDensity = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Mustache = "ffc61752-8dc7-55ee-8c37-f3e9cdd09e70" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" From f21cdd50b2dc6417861b2d8b280ef78da02bddfd Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 15 Feb 2024 17:22:39 +0100 Subject: [PATCH 009/117] StatsBase normalization and Optimizer struct --- src/models/gaussianprocess.jl | 69 ++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index a1e936cd6..192290829 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -11,25 +11,56 @@ julia> rs = ResponseSurface(data, :y, 2) |> DisplayAs.withcontext(:compact => tr ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutative{CreationOrder}, Graded{LexOrder}}[1, x₁, x₁²]) ``` """ -struct Normalizer # Maybe move this somewhere else... - μ::Union{Real, Matrix{<:Real}} - σ::Union{Real, Matrix{<:Real}} -end - -normalize(data::Union{Vector{Real}, Matrix{<:Real}}, normalizer::Normalizer) = (data .- normalizer.μ) ./ normalizer.σ -denormalize(data::Union{Vector{Real}, Matrix{<:Real}}, normalizer::Normalizer) = data .* normalizer.σ .+ normalizer.μ - mutable struct GaussianProcessRegressor <: UQModel gp::GPBase inputs::Union{Vector{<:UQInput}, Vector{Symbol}} output::Symbol - input_normalizer::Union{Normalizer, Nothing} - output_normalizer::Union{Normalizer, Nothing} + input_normalizer::Union{ZScoreTransform, Nothing} + output_normalizer::Union{ZScoreTransform, Nothing} end +function normalize!( + input::Union{Vector{Real}, Matrix{<:Real}}, + output::Vector{Real}, + normalize_input::Bool, + normalize_output::Bool, + log_noise::Real +) + if normalize_input + input_normalizer = fit(ZScoreTransform, input) + input[:] = StatsBase.transform(input_normalizer, input) + else + input_normalizer = nothing + end + + if normalize_output + output_normalizer = fit(ZScoreTransform, output) + output[:] = StatsBase.transform(output_normalizer, output) + log_noise -= log(output_normalizer.scale[1]) + else + output_normalizer = nothing + end + + return input_normalizer, output_normalizer, log_noise +end + +struct Optimizer + optimizer + opt_kwargs::Dict + hyperparams::Dict + bounds::Dict +end + +Optimizer() = Optimizer( + LBFGS(), + Dict(), + Dict(:domean => true, :kern => true, :noise => true, :lik => true), + Dict(:meanbounds => nothing, :kernbounds => nothing, :noisebounds => nothing, :likbounds => nothing) + ) + function gaussianprocess( df::DataFrame, - inputs::Vector{Symbol}, + input::Vector{Symbol}, output::Symbol, kernel::Kernel, mean::GaussianProcesses.Mean=MeanZero(), @@ -37,22 +68,10 @@ function gaussianprocess( normalize_input::Bool=false, normalize_output::Bool=false ) - X = Matrix(df[:, inputs])' + x = Matrix(df[:, input])' y = df[:, output] - if normalize_input - input_normalizer = Normalizer(mean(X, dims=2), std(X, dims=2)) - X = normalize(X, input_normalizer) - else - input_normalizer = nothing - end - if normalize_output - output_normalizer = Normalizer(mean(y), std(y)) - y = normalize(y, output_normalizer) - log_noise -= log(output_normalizer.σ) - else - output_normalizer = nothing - end + input_normalizer, output_normalizer, log_noise = normalize!(x, y, normalize_input, normalize_output, log_noise) gp = GP(X, y, mean, kernel, log_noise) From fdca54c8084fead71b37bc33b2978f067baf81de Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 29 Feb 2024 18:03:09 +0100 Subject: [PATCH 010/117] Optimizer in gp function, implement evaluate method --- src/models/gaussianprocess.jl | 129 +++++++++++++++++++++++++++++----- 1 file changed, 111 insertions(+), 18 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index 192290829..f393e4691 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -20,8 +20,8 @@ mutable struct GaussianProcessRegressor <: UQModel end function normalize!( - input::Union{Vector{Real}, Matrix{<:Real}}, - output::Vector{Real}, + input::Union{Vector{<:Real}, Matrix{<:Real}}, + output::Vector{<:Real}, normalize_input::Bool, normalize_output::Bool, log_noise::Real @@ -45,12 +45,17 @@ function normalize!( end struct Optimizer - optimizer - opt_kwargs::Dict + # maybe give one default and allow JuMP structs + method::Union{Optim.LBFGS, Optim.ConjugateGradient} # not sure how or even if to support multiple solvers + optim_options::Dict # maybe there is a better option than using dicts for this hyperparams::Dict bounds::Dict end +struct ExperimentalDesign # not sure about the name + sim::AbstractMonteCarlo # could also allow doe +end + Optimizer() = Optimizer( LBFGS(), Dict(), @@ -65,37 +70,125 @@ function gaussianprocess( kernel::Kernel, mean::GaussianProcesses.Mean=MeanZero(), log_noise::Real=-2.0, + optimizer::Union{Optimizer, Nothing}=Optimizer(), normalize_input::Bool=false, normalize_output::Bool=false ) - x = Matrix(df[:, input])' + x = copy(Matrix(df[:, input])') y = df[:, output] - input_normalizer, output_normalizer, log_noise = normalize!(x, y, normalize_input, normalize_output, log_noise) - gp = GP(X, y, mean, kernel, log_noise) + gp = GP(x, y, mean, kernel, log_noise) + if !isnothing(optimizer) + optimize!(gp; + method=optimizer.method, + optimizer.hyperparams..., + optimizer.bounds..., + optimizer.optim_options... + ) + end gp = GaussianProcessRegressor( - gp, inputs, output, + gp, input, output, input_normalizer, output_normalizer ) return gp, df end -# Wrapper for optimize! method from GaussianProcesses.jl -function optimize_hyperparams!(gpr::GaussianProcessRegressor, args...; method = LBFGS(), - domean::Bool = true, kern::Bool = true, noise::Bool = true, - lik::Bool = true, meanbounds = nothing, kernbounds = nothing, - noisebounds = nothing, likbounds = nothing, kwargs... +function polynomialchaos( + inputs::Vector{<:UQInput}, + model::Vector{<:UQModel}, + Ψ::PolynomialChaosBasis, + output::Symbol, + _::GaussQuadrature, ) + random_inputs = filter(i -> isa(i, RandomUQInput), inputs) + deterministic_inputs = filter(i -> isa(i, DeterministicUQInput), inputs) + random_names = names(random_inputs) + + nodes = mapreduce( + n -> [n...]', vcat, Iterators.product(quadrature_nodes.(Ψ.p + 1, Ψ.bases)...) + ) + weights = map(prod, Iterators.product(quadrature_weights.(Ψ.p + 1, Ψ.bases)...)) - optimize!(gpr.gp, args...; method = method, - domean=domean, kern=kern, noise=noise, lik=lik, - meanbounds=meanbounds, kernbounds=kernbounds, - noisebounds=noisebounds, likbounds=likbounds, - kwargs...) + samples = DataFrame(map_from_bases(Ψ, nodes), random_names) + to_physical_space!(random_inputs, samples) + + if !isempty(deterministic_inputs) + samples = hcat(samples, sample(deterministic_inputs, size(nodes, 1))) + end + + evaluate!(model, samples) + + y = mapreduce( + (x, w, f) -> f * w * evaluate(Ψ, collect(x)), + +, + eachrow(nodes), + weights, + samples[:, output], + ) + + return PolynomialChaosExpansion(y, Ψ, output, random_inputs), samples end +function polynomialchaos( + inputs::UQInput, + model::Vector{<:UQModel}, + Ψ::PolynomialChaosBasis, + output::Symbol, + gq::GaussQuadrature, +) + return polynomialchaos([inputs], model, Ψ, output, gq) +end +function polynomialchaos( + inputs::Vector{<:UQInput}, + model::UQModel, + Ψ::PolynomialChaosBasis, + output::Symbol, + gq::GaussQuadrature, +) + return polynomialchaos(inputs, [model], Ψ, output, gq) +end + +function polynomialchaos( + inputs::UQInput, + model::UQModel, + Ψ::PolynomialChaosBasis, + output::Symbol, + gq::GaussQuadrature, +) + return polynomialchaos([inputs], [model], Ψ, output, gq) +end + +# what should this return? +function evaluate!(gpr::GaussianProcessRegressor, df::DataFrame) # this now gives mean and variance at inputs + data = Matrix(df[:, names(gpr.inputs)])' + if !isnothing(gpr.input_normalizer) + μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_normalizer, data)) + else + μ, Σ = predict_y(gpr.gp, data) + end + + if !isnothing(grp.output_normalizer) + μ[:] = μ .* gpr.output_normalizer.scale[1] .+ gpr.output_normalizer.mean[1] + Σ[:] = Σ .* gpr.output_normalizer.scale[1]^2 + end + + df[!, Symbol(gpr.output, "_mean")] = μ + df[!, Symbol(gpr.output, "_var")] = Σ + return nothing +end + +function sample(pce::PolynomialChaosExpansion, n::Integer) + samps = hcat(sample.(n, pce.Ψ.bases)...) + out = map(row -> dot(pce.y, evaluate(pce.Ψ, collect(row))), eachrow(samps)) + + samps = DataFrame(map_from_bases(pce.Ψ, samps), names(pce.inputs)) + to_physical_space!(pce.inputs, samps) + + samps[!, pce.output] = out + return samps +end From 8db025f5c9230fcbb4ebc20f420c5d1c454a74d1 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 4 Mar 2024 11:55:53 +0100 Subject: [PATCH 011/117] Gaussianprocess from input, model and output --- src/models/gaussianprocess.jl | 127 +++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 47 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index f393e4691..42f475efd 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -13,7 +13,7 @@ ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutativ """ mutable struct GaussianProcessRegressor <: UQModel gp::GPBase - inputs::Union{Vector{<:UQInput}, Vector{Symbol}} + input::Union{Vector{<:UQInput}, Vector{Symbol}} output::Symbol input_normalizer::Union{ZScoreTransform, Nothing} output_normalizer::Union{ZScoreTransform, Nothing} @@ -46,7 +46,7 @@ end struct Optimizer # maybe give one default and allow JuMP structs - method::Union{Optim.LBFGS, Optim.ConjugateGradient} # not sure how or even if to support multiple solvers + method::Union{Optim.LBFGS, Optim.ConjugateGradient} # not sure how or even if to support multiple solvers (Matlab uses QuasiNewton default) optim_options::Dict # maybe there is a better option than using dicts for this hyperparams::Dict bounds::Dict @@ -96,75 +96,108 @@ function gaussianprocess( return gp, df end -function polynomialchaos( - inputs::Vector{<:UQInput}, +function gaussianprocess( + input::Vector{<:UQInput}, model::Vector{<:UQModel}, - Ψ::PolynomialChaosBasis, output::Symbol, - _::GaussQuadrature, + ed::ExperimentalDesign, + kernel::Kernel, + mean::GaussianProcesses.Mean=MeanZero(), + log_noise::Real=-2.0, + optimizer::Union{Optimizer, Nothing}=Optimizer(), + normalize_input::Bool=false, + normalize_output::Bool=false ) - random_inputs = filter(i -> isa(i, RandomUQInput), inputs) - deterministic_inputs = filter(i -> isa(i, DeterministicUQInput), inputs) - random_names = names(random_inputs) - - nodes = mapreduce( - n -> [n...]', vcat, Iterators.product(quadrature_nodes.(Ψ.p + 1, Ψ.bases)...) - ) - weights = map(prod, Iterators.product(quadrature_weights.(Ψ.p + 1, Ψ.bases)...)) + samples = sample(input, ed.sim) + evaluate!(model, samples) - samples = DataFrame(map_from_bases(Ψ, nodes), random_names) - to_physical_space!(random_inputs, samples) + random_input = filter(i -> isa(i, RandomUQInput), input) + random_names = names(random_input) - if !isempty(deterministic_inputs) - samples = hcat(samples, sample(deterministic_inputs, size(nodes, 1))) + to_standard_normal_space!(random_input, samples) # not sure if this is save to do in every case + x = copy(Matrix(samples[:, random_names])') + y = df[:, output] + _, output_normalizer, log_noise = normalize!(x, y, normalize_input, normalize_output, log_noise) # do not need input normalizer here + + gp = GP(x, y, mean, kernel, log_noise) + if !isnothing(optimizer) + optimize!(gp; + method=optimizer.method, + optimizer.hyperparams..., + optimizer.bounds..., + optimizer.optim_options... + ) end - evaluate!(model, samples) - - y = mapreduce( - (x, w, f) -> f * w * evaluate(Ψ, collect(x)), - +, - eachrow(nodes), - weights, - samples[:, output], - ) + gp = GaussianProcessRegressor( + gp, input, output, + _, output_normalizer + ) + to_physical_space!(random_input, samples) - return PolynomialChaosExpansion(y, Ψ, output, random_inputs), samples + return gp, samples end -function polynomialchaos( - inputs::UQInput, +function gaussianprocess( + input::UQInput, model::Vector{<:UQModel}, - Ψ::PolynomialChaosBasis, output::Symbol, - gq::GaussQuadrature, + ed::ExperimentalDesign, + kernel::Kernel, + mean::GaussianProcesses.Mean=MeanZero(), + log_noise::Real=-2.0, + optimizer::Union{Optimizer, Nothing}=Optimizer(), + normalize_input::Bool=false, + normalize_output::Bool=false ) - return polynomialchaos([inputs], model, Ψ, output, gq) + return gaussianprocess( + [input], model, output, + ed, kernel, mean, log_noise, + optimizer, normalize_input, normalize_output + ) end -function polynomialchaos( - inputs::Vector{<:UQInput}, +function gaussianprocess( + input::Vector{<:UQInput}, model::UQModel, - Ψ::PolynomialChaosBasis, output::Symbol, - gq::GaussQuadrature, + ed::ExperimentalDesign, + kernel::Kernel, + mean::GaussianProcesses.Mean=MeanZero(), + log_noise::Real=-2.0, + optimizer::Union{Optimizer, Nothing}=Optimizer(), + normalize_input::Bool=false, + normalize_output::Bool=false ) - return polynomialchaos(inputs, [model], Ψ, output, gq) + return gaussianprocess( + input, [model], output, + ed, kernel, mean, log_noise, + optimizer, normalize_input, normalize_output + ) end -function polynomialchaos( - inputs::UQInput, +function gaussianprocess( + input::UQInput, model::UQModel, - Ψ::PolynomialChaosBasis, output::Symbol, - gq::GaussQuadrature, + ed::ExperimentalDesign, + kernel::Kernel, + mean::GaussianProcesses.Mean=MeanZero(), + log_noise::Real=-2.0, + optimizer::Union{Optimizer, Nothing}=Optimizer(), + normalize_input::Bool=false, + normalize_output::Bool=false ) - return polynomialchaos([inputs], [model], Ψ, output, gq) + return gaussianprocess( + [input], [model], output, + ed, kernel, mean, log_noise, + optimizer, normalize_input, normalize_output + ) end # what should this return? -function evaluate!(gpr::GaussianProcessRegressor, df::DataFrame) # this now gives mean and variance at inputs - data = Matrix(df[:, names(gpr.inputs)])' +function evaluate!(gpr::GaussianProcessRegressor, df::DataFrame) # this now gives mean and variance at input + data = Matrix(df[:, names(gpr.input)])' if !isnothing(gpr.input_normalizer) μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_normalizer, data)) else @@ -185,8 +218,8 @@ function sample(pce::PolynomialChaosExpansion, n::Integer) samps = hcat(sample.(n, pce.Ψ.bases)...) out = map(row -> dot(pce.y, evaluate(pce.Ψ, collect(row))), eachrow(samps)) - samps = DataFrame(map_from_bases(pce.Ψ, samps), names(pce.inputs)) - to_physical_space!(pce.inputs, samps) + samps = DataFrame(map_from_bases(pce.Ψ, samps), names(pce.input)) + to_physical_space!(pce.input, samps) samps[!, pce.output] = out return samps From 88777e5137011100e13a8fa18793bdd7bd8a64a2 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 5 Mar 2024 15:18:48 +0100 Subject: [PATCH 012/117] Removed input normalization where UQInput is given --- src/models/gaussianprocess.jl | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index 42f475efd..a7b735b2f 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -105,7 +105,6 @@ function gaussianprocess( mean::GaussianProcesses.Mean=MeanZero(), log_noise::Real=-2.0, optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_input::Bool=false, normalize_output::Bool=false ) samples = sample(input, ed.sim) @@ -117,7 +116,7 @@ function gaussianprocess( to_standard_normal_space!(random_input, samples) # not sure if this is save to do in every case x = copy(Matrix(samples[:, random_names])') y = df[:, output] - _, output_normalizer, log_noise = normalize!(x, y, normalize_input, normalize_output, log_noise) # do not need input normalizer here + _, output_normalizer, log_noise = normalize!(x, y, false, normalize_output, log_noise) # do not need input normalizer here gp = GP(x, y, mean, kernel, log_noise) if !isnothing(optimizer) @@ -147,13 +146,12 @@ function gaussianprocess( mean::GaussianProcesses.Mean=MeanZero(), log_noise::Real=-2.0, optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_input::Bool=false, normalize_output::Bool=false ) return gaussianprocess( [input], model, output, ed, kernel, mean, log_noise, - optimizer, normalize_input, normalize_output + optimizer, normalize_output ) end @@ -166,13 +164,12 @@ function gaussianprocess( mean::GaussianProcesses.Mean=MeanZero(), log_noise::Real=-2.0, optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_input::Bool=false, normalize_output::Bool=false ) return gaussianprocess( input, [model], output, ed, kernel, mean, log_noise, - optimizer, normalize_input, normalize_output + optimizer, normalize_output ) end @@ -185,13 +182,12 @@ function gaussianprocess( mean::GaussianProcesses.Mean=MeanZero(), log_noise::Real=-2.0, optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_input::Bool=false, normalize_output::Bool=false ) return gaussianprocess( [input], [model], output, ed, kernel, mean, log_noise, - optimizer, normalize_input, normalize_output + optimizer, normalize_output ) end From 5eadb181cb7f47a276f386ee0e17529bd049964f Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 5 Mar 2024 15:19:46 +0100 Subject: [PATCH 013/117] Ignoring sample method for now --- src/models/gaussianprocess.jl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index a7b735b2f..aba71fc9b 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -210,14 +210,15 @@ function evaluate!(gpr::GaussianProcessRegressor, df::DataFrame) # this now give return nothing end -function sample(pce::PolynomialChaosExpansion, n::Integer) - samps = hcat(sample.(n, pce.Ψ.bases)...) - out = map(row -> dot(pce.y, evaluate(pce.Ψ, collect(row))), eachrow(samps)) +# Not sure how to design a similar function for gps, or if this is even desirable +# function sample(pce::PolynomialChaosExpansion, n::Integer) +# samps = hcat(sample.(n, pce.Ψ.bases)...) +# out = map(row -> dot(pce.y, evaluate(pce.Ψ, collect(row))), eachrow(samps)) - samps = DataFrame(map_from_bases(pce.Ψ, samps), names(pce.input)) - to_physical_space!(pce.input, samps) +# samps = DataFrame(map_from_bases(pce.Ψ, samps), names(pce.input)) +# to_physical_space!(pce.input, samps) - samps[!, pce.output] = out - return samps -end +# samps[!, pce.output] = out +# return samps +# end From ec975e46b6606a39420e57c7b09bdb519934666f Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Mon, 13 May 2024 15:01:15 +0200 Subject: [PATCH 014/117] Suggestion for naming of normalize --- src/models/gaussianprocess.jl | 49 +++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index 192290829..b2bb61358 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -19,9 +19,9 @@ mutable struct GaussianProcessRegressor <: UQModel output_normalizer::Union{ZScoreTransform, Nothing} end -function normalize!( - input::Union{Vector{Real}, Matrix{<:Real}}, - output::Vector{Real}, +function normalize!( # maybe different name as this is only supposed to be used in the GP context + input::Union{Vector{<:Real}, AbstractMatrix{<:Real}}, + output::Vector{<:Real}, normalize_input::Bool, normalize_output::Bool, log_noise::Real @@ -44,11 +44,12 @@ function normalize!( return input_normalizer, output_normalizer, log_noise end -struct Optimizer +struct Optimizer # there is probably a better way to design this optimizer opt_kwargs::Dict hyperparams::Dict bounds::Dict + # should I add number of optimizer runs? end Optimizer() = Optimizer( @@ -65,6 +66,7 @@ function gaussianprocess( kernel::Kernel, mean::GaussianProcesses.Mean=MeanZero(), log_noise::Real=-2.0, + optimizer::Union{Optimizer, Nothing}=Optimizer(), # there is probably a better way to design this normalize_input::Bool=false, normalize_output::Bool=false ) @@ -73,29 +75,38 @@ function gaussianprocess( input_normalizer, output_normalizer, log_noise = normalize!(x, y, normalize_input, normalize_output, log_noise) - gp = GP(X, y, mean, kernel, log_noise) + gp = GP(x, y, mean, kernel, log_noise) + + if !isnothing(optimizer) + optimize!(gp; + method=optimizer.optimizer, + optimizer.hyperparams..., + optimizer.bounds..., + optimizer.opt_kwargs... + ) + end gp = GaussianProcessRegressor( - gp, inputs, output, + gp, input, output, input_normalizer, output_normalizer ) - return gp, df + return gp, df # this method does not really need to return df end # Wrapper for optimize! method from GaussianProcesses.jl -function optimize_hyperparams!(gpr::GaussianProcessRegressor, args...; method = LBFGS(), - domean::Bool = true, kern::Bool = true, noise::Bool = true, - lik::Bool = true, meanbounds = nothing, kernbounds = nothing, - noisebounds = nothing, likbounds = nothing, kwargs... -) - - optimize!(gpr.gp, args...; method = method, - domean=domean, kern=kern, noise=noise, lik=lik, - meanbounds=meanbounds, kernbounds=kernbounds, - noisebounds=noisebounds, likbounds=likbounds, - kwargs...) -end +# function optimize_hyperparams!(gpr::GaussianProcessRegressor, args...; method = LBFGS(), +# domean::Bool = true, kern::Bool = true, noise::Bool = true, +# lik::Bool = true, meanbounds = nothing, kernbounds = nothing, +# noisebounds = nothing, likbounds = nothing, kwargs... +# ) + +# optimize!(gpr.gp, args...; method = method, +# domean=domean, kern=kern, noise=noise, lik=lik, +# meanbounds=meanbounds, kernbounds=kernbounds, +# noisebounds=noisebounds, likbounds=likbounds, +# kwargs...) +# end From b62b02153e489db4a19cec836d526e90d6f8120e Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Mon, 13 May 2024 15:45:20 +0200 Subject: [PATCH 015/117] Accepted incoming changes --- src/models/gaussianprocess.jl | 41 ----------------------------------- 1 file changed, 41 deletions(-) diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl index ad997f644..1b0be8d71 100644 --- a/src/models/gaussianprocess.jl +++ b/src/models/gaussianprocess.jl @@ -19,13 +19,8 @@ mutable struct GaussianProcessRegressor <: UQModel output_normalizer::Union{ZScoreTransform, Nothing} end -<<<<<<< HEAD -function normalize!( # maybe different name as this is only supposed to be used in the GP context - input::Union{Vector{<:Real}, AbstractMatrix{<:Real}}, -======= function normalize!( input::Union{Vector{<:Real}, Matrix{<:Real}}, ->>>>>>> 5eadb181cb7f47a276f386ee0e17529bd049964f output::Vector{<:Real}, normalize_input::Bool, normalize_output::Bool, @@ -49,16 +44,10 @@ function normalize!( return input_normalizer, output_normalizer, log_noise end -<<<<<<< HEAD -struct Optimizer # there is probably a better way to design this - optimizer - opt_kwargs::Dict -======= struct Optimizer # maybe give one default and allow JuMP structs method::Union{Optim.LBFGS, Optim.ConjugateGradient} # not sure how or even if to support multiple solvers (Matlab uses QuasiNewton default) optim_options::Dict # maybe there is a better option than using dicts for this ->>>>>>> 5eadb181cb7f47a276f386ee0e17529bd049964f hyperparams::Dict bounds::Dict # should I add number of optimizer runs? @@ -82,11 +71,7 @@ function gaussianprocess( kernel::Kernel, mean::GaussianProcesses.Mean=MeanZero(), log_noise::Real=-2.0, -<<<<<<< HEAD - optimizer::Union{Optimizer, Nothing}=Optimizer(), # there is probably a better way to design this -======= optimizer::Union{Optimizer, Nothing}=Optimizer(), ->>>>>>> 5eadb181cb7f47a276f386ee0e17529bd049964f normalize_input::Bool=false, normalize_output::Bool=false ) @@ -95,22 +80,12 @@ function gaussianprocess( input_normalizer, output_normalizer, log_noise = normalize!(x, y, normalize_input, normalize_output, log_noise) gp = GP(x, y, mean, kernel, log_noise) -<<<<<<< HEAD - - if !isnothing(optimizer) - optimize!(gp; - method=optimizer.optimizer, - optimizer.hyperparams..., - optimizer.bounds..., - optimizer.opt_kwargs... -======= if !isnothing(optimizer) optimize!(gp; method=optimizer.method, optimizer.hyperparams..., optimizer.bounds..., optimizer.optim_options... ->>>>>>> 5eadb181cb7f47a276f386ee0e17529bd049964f ) end @@ -122,21 +97,6 @@ function gaussianprocess( return gp, df # this method does not really need to return df end -<<<<<<< HEAD -# Wrapper for optimize! method from GaussianProcesses.jl -# function optimize_hyperparams!(gpr::GaussianProcessRegressor, args...; method = LBFGS(), -# domean::Bool = true, kern::Bool = true, noise::Bool = true, -# lik::Bool = true, meanbounds = nothing, kernbounds = nothing, -# noisebounds = nothing, likbounds = nothing, kwargs... -# ) - -# optimize!(gpr.gp, args...; method = method, -# domean=domean, kern=kern, noise=noise, lik=lik, -# meanbounds=meanbounds, kernbounds=kernbounds, -# noisebounds=noisebounds, likbounds=likbounds, -# kwargs...) -# end -======= function gaussianprocess( input::Vector{<:UQInput}, model::Vector{<:UQModel}, @@ -177,7 +137,6 @@ function gaussianprocess( return gp, samples end ->>>>>>> 5eadb181cb7f47a276f386ee0e17529bd049964f function gaussianprocess( input::UQInput, From efcc4faad85bbe3528a9ecce057cd4e02b250421 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Tue, 24 Sep 2024 11:05:55 +0200 Subject: [PATCH 016/117] Remove dependecies on GaussianProcesses.jl --- src/UncertaintyQuantification.jl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index ddbdaad00..288e11957 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -6,11 +6,9 @@ using DataFrames using Dates using Dierckx using Distributed -# using DynamicPolynomials using FastGaussQuadrature using FiniteDifferences using Formatting -using GaussianProcesses using KernelDensity using LinearAlgebra using MeshAdaptiveDirectSearch @@ -87,7 +85,6 @@ export ForwardFiniteDifferences export FractionalFactorial export FullFactorial export GaussianCopula -export GaussianProcessRegressor # do I need that to be exported? export GaussQuadrature export HaltonSampling export HermiteBasis @@ -129,12 +126,10 @@ export distribution_parameters export doe_samples export evaluate export evaluate! -export gaussianprocess export gradient export gradient_in_standard_normal_space export mean export multivariate_indices -export optimize_hyperparams! # Maybe rename this, so it is clear that this works on gaussianprocesses export polynomialchaos export probability_of_failure export propagate_intervals! @@ -167,7 +162,6 @@ include("solvers/extractor.jl") include("hpc/slurm.jl") include("models/externalmodel.jl") -include("models/gaussianprocess.jl") include("models/model.jl") include("models/imprecise/propagation.jl") include("models/polyharmonicspline.jl") From d63cde95df069c3760850dec863e77fdecf3943c Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:47:59 +0200 Subject: [PATCH 017/117] First working version --- src/models/gp/gaussianprocess.jl | 209 +++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 src/models/gp/gaussianprocess.jl diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl new file mode 100644 index 000000000..a13ddb2d5 --- /dev/null +++ b/src/models/gp/gaussianprocess.jl @@ -0,0 +1,209 @@ +""" + GaussianProcess(data::DataFrame, dependendVarName::Symbol, deg::Int, dim::Int) + +Creates a gaussian process prior .... + +# Examples +```jldoctest +julia> data = DataFrame(x = 1:10, y = [1, 4, 10, 15, 24, 37, 50, 62, 80, 101]); + +julia> rs = ResponseSurface(data, :y, 2) |> DisplayAs.withcontext(:compact => true) +ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutative{CreationOrder}, Graded{LexOrder}}[1, x₁, x₁²]) +``` +""" +struct GaussianProcessRegressor <: UQModel + gp::AbstractGPs.AbstractGP + input::Union{Vector{<:UQInput}, Vector{Symbol}} + output::Symbol + data::DataFrame +end + +default_optimizer = LBFGS() +NoiseTypes = Union{ + ParameterHandling.Positive, + ParameterHandling.Bounded, + ParameterHandling.Fixed + } +default_mean() = ZeroMean() + +function normalize!( + input::Union{Vector{<:Real}, Matrix{<:Real}}, + output::Vector{<:Real}, + normalize_input::Bool, + normalize_output::Bool, + log_noise::Real +) + if normalize_input + input_normalizer = fit(ZScoreTransform, input) + input[:] = StatsBase.transform(input_normalizer, input) + else + input_normalizer = nothing + end + + if normalize_output + output_normalizer = fit(ZScoreTransform, output) + output[:] = StatsBase.transform(output_normalizer, output) + log_noise -= log(output_normalizer.scale[1]) + else + output_normalizer = nothing + end + + return input_normalizer, output_normalizer, log_noise +end + +struct ExperimentalDesign # not sure about the name + sim::AbstractMonteCarlo # could also allow doe +end + +function logml(θ, input, output, mean_f, kernel_f) + gp = GP( + mean_f(ParameterHandling.value(θ.mean)), + kernel_f(ParameterHandling.value(θ.kernel)) + ) + f = gp( + input, + ParameterHandling.value(θ.noise)[1]^2 # same as in gaussianprocess... + ) + return -logpdf(f, output) +end + +function maximize_logml(logml, θ, input, output, mean_f, kernel_f; optimizer, maxiter=1_000) + options = Optim.Options(; iterations=maxiter, show_trace=true) + + θ_flat, unflatten = ParameterHandling.value_flatten(θ) + + ## https://julianlsolvers.github.io/Optim.jl/stable/#user/tipsandtricks/#avoid-repeating-computations + function fg!(F, G, x) + if F !== nothing && G !== nothing + val, grad = Zygote.withgradient( + x -> logml(unflatten(x), input, output, mean_f, kernel_f), + x + ) + G .= only(grad) + return val + elseif G !== nothing + grad = Zygote.gradient( + x -> logml(unflatten(x), input, output, mean_f, kernel_f), + x + ) + G .= only(grad) + return nothing + elseif F !== nothing + return logml(unflatten(x), input, output, mean_f, kernel_f) + end + end + + result = optimize(Optim.only_fg!(fg!), θ_flat, optimizer, options; inplace=false) + + return unflatten(result.minimizer), result +end + +function gaussianprocess( + inputs::Vector{<:UQInput}, + model::UQModel, + output::Symbol, + kernel_f::Function, + mean_f::Function, # should provide a default mean + kernel_params::NamedTuple, # could be more specific than NamedTuple + mean_params::NamedTuple, + noise::NamedTuple, # how to do default value? (=positive(exp(-2.0))) + exp_design::ExperimentalDesign, + optimizer::Union{Optim.AbstractOptimizer, Nothing}=default_optimizer +) + samples = sample(inputs, exp_design.sim) # need to be able to pass experimental design + evaluate!(model, samples) + + random_inputs = filter(i -> isa(i, RandomUQInput), inputs) + random_names = names(random_inputs) + + # to_standard_normal_space!(random_inputs, samples) # maybe let user choose standardization + + θ = (; + mean = mean_params, + kernel = kernel_params, + noise = noise + ) + + # Turn DataFrame samples into arrays of correct size + X = Array(samples[:, random_names]) + Y = Array(samples[:, output]) + size(X, 2) == 1 ? X = dropdims(X; dims=2) : nothing # this is not safe for every case at the moment + + if isnothing(optimizer) + # If no optimizer is given we just conditionalize on output + gp = GP( + mean_f(ParameterHandling.value(θ.mean)), + kernel_f(ParameterHandling.value(θ.kernel)) + ) + fx = gp(X, ParameterHandling.value(θ.noise)[1]^2) # this should be possible to do in a better way... + gp = posterior(fx, Y) + else + # Use the passed optimizer to maximize marginal log likelihood + θ_opt, logml_ = maximize_logml(logml, θ, X, Y, mean_f, kernel_f; optimizer=optimizer) # should I return the logml? + gp = GP( + mean_f(ParameterHandling.value(θ_opt.mean)), + kernel_f(ParameterHandling.value(θ_opt.kernel)) + ) + fx = gp(X, ParameterHandling.value(θ_opt.noise)[1]^2) # this should be possible to do in a better way... + gp = posterior(fx, Y) + end + + # to_physical_space!(random_inputs, samples) + + # not sure if i need to return samples + # maybe return the log marginal likelihood + + return GaussianProcessRegressor(gp, random_inputs, output, samples) +end + +function gaussianprocess( + inputs::Symbol, + model::UQModel, + output::Symbol, + kernel::Function, + mean::Function=default_mean, + kernel_params::NamedTuple, # could be more specific than NamedTuple + mean_params::NamedTuple, + noise::NamedTuple, # how to do default value? (=positive(exp(-2.0))) + optimizer::Union{Optimizer, Nothing}=default_optimizer, + exp_design::ExperimentalDesign +) + return gaussianprocess( + [inputs], model, output, + kernel, mean, kernel_params, + mean_params, noise, optimizer, + exp_design + ) +end + +# what should this return? +function evaluate!(gpr::GaussianProcessRegressor, df::DataFrame) # this now gives mean and variance at input + data = Matrix(df[:, names(gpr.input)])' + if !isnothing(gpr.input_normalizer) + μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_normalizer, data)) + else + μ, Σ = predict_y(gpr.gp, data) + end + + if !isnothing(grp.output_normalizer) + μ[:] = μ .* gpr.output_normalizer.scale[1] .+ gpr.output_normalizer.mean[1] + Σ[:] = Σ .* gpr.output_normalizer.scale[1]^2 + end + + df[!, Symbol(gpr.output, "_mean")] = μ + df[!, Symbol(gpr.output, "_var")] = Σ + return nothing +end + +# Not sure how to design a similar function for gps, or if this is even desirable +# function sample(pce::PolynomialChaosExpansion, n::Integer) +# samps = hcat(sample.(n, pce.Ψ.bases)...) +# out = map(row -> dot(pce.y, evaluate(pce.Ψ, collect(row))), eachrow(samps)) + +# samps = DataFrame(map_from_bases(pce.Ψ, samps), names(pce.input)) +# to_physical_space!(pce.input, samps) + +# samps[!, pce.output] = out +# return samps +# end + From bfd10d741fc62908be2aa3b1fe256b6138e4c043 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:48:36 +0200 Subject: [PATCH 018/117] Moved gaussianprocesses in subdirectory --- src/models/gaussianprocess.jl | 225 ---------------------------------- 1 file changed, 225 deletions(-) delete mode 100644 src/models/gaussianprocess.jl diff --git a/src/models/gaussianprocess.jl b/src/models/gaussianprocess.jl deleted file mode 100644 index 1b0be8d71..000000000 --- a/src/models/gaussianprocess.jl +++ /dev/null @@ -1,225 +0,0 @@ -""" - GaussianProcess(data::DataFrame, dependendVarName::Symbol, deg::Int, dim::Int) - -Creates a gaussian process prior .... - -# Examples -```jldoctest -julia> data = DataFrame(x = 1:10, y = [1, 4, 10, 15, 24, 37, 50, 62, 80, 101]); - -julia> rs = ResponseSurface(data, :y, 2) |> DisplayAs.withcontext(:compact => true) -ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutative{CreationOrder}, Graded{LexOrder}}[1, x₁, x₁²]) -``` -""" -mutable struct GaussianProcessRegressor <: UQModel - gp::GPBase - input::Union{Vector{<:UQInput}, Vector{Symbol}} - output::Symbol - input_normalizer::Union{ZScoreTransform, Nothing} - output_normalizer::Union{ZScoreTransform, Nothing} -end - -function normalize!( - input::Union{Vector{<:Real}, Matrix{<:Real}}, - output::Vector{<:Real}, - normalize_input::Bool, - normalize_output::Bool, - log_noise::Real -) - if normalize_input - input_normalizer = fit(ZScoreTransform, input) - input[:] = StatsBase.transform(input_normalizer, input) - else - input_normalizer = nothing - end - - if normalize_output - output_normalizer = fit(ZScoreTransform, output) - output[:] = StatsBase.transform(output_normalizer, output) - log_noise -= log(output_normalizer.scale[1]) - else - output_normalizer = nothing - end - - return input_normalizer, output_normalizer, log_noise -end - -struct Optimizer - # maybe give one default and allow JuMP structs - method::Union{Optim.LBFGS, Optim.ConjugateGradient} # not sure how or even if to support multiple solvers (Matlab uses QuasiNewton default) - optim_options::Dict # maybe there is a better option than using dicts for this - hyperparams::Dict - bounds::Dict - # should I add number of optimizer runs? -end - -struct ExperimentalDesign # not sure about the name - sim::AbstractMonteCarlo # could also allow doe -end - -Optimizer() = Optimizer( - LBFGS(), - Dict(), - Dict(:domean => true, :kern => true, :noise => true, :lik => true), - Dict(:meanbounds => nothing, :kernbounds => nothing, :noisebounds => nothing, :likbounds => nothing) - ) - -function gaussianprocess( - df::DataFrame, - input::Vector{Symbol}, - output::Symbol, - kernel::Kernel, - mean::GaussianProcesses.Mean=MeanZero(), - log_noise::Real=-2.0, - optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_input::Bool=false, - normalize_output::Bool=false -) - x = copy(Matrix(df[:, input])') - y = df[:, output] - input_normalizer, output_normalizer, log_noise = normalize!(x, y, normalize_input, normalize_output, log_noise) - - gp = GP(x, y, mean, kernel, log_noise) - if !isnothing(optimizer) - optimize!(gp; - method=optimizer.method, - optimizer.hyperparams..., - optimizer.bounds..., - optimizer.optim_options... - ) - end - - gp = GaussianProcessRegressor( - gp, input, output, - input_normalizer, output_normalizer - ) - - return gp, df # this method does not really need to return df -end - -function gaussianprocess( - input::Vector{<:UQInput}, - model::Vector{<:UQModel}, - output::Symbol, - ed::ExperimentalDesign, - kernel::Kernel, - mean::GaussianProcesses.Mean=MeanZero(), - log_noise::Real=-2.0, - optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_output::Bool=false -) - samples = sample(input, ed.sim) - evaluate!(model, samples) - - random_input = filter(i -> isa(i, RandomUQInput), input) - random_names = names(random_input) - - to_standard_normal_space!(random_input, samples) # not sure if this is save to do in every case - x = copy(Matrix(samples[:, random_names])') - y = df[:, output] - _, output_normalizer, log_noise = normalize!(x, y, false, normalize_output, log_noise) # do not need input normalizer here - - gp = GP(x, y, mean, kernel, log_noise) - if !isnothing(optimizer) - optimize!(gp; - method=optimizer.method, - optimizer.hyperparams..., - optimizer.bounds..., - optimizer.optim_options... - ) - end - - gp = GaussianProcessRegressor( - gp, input, output, - _, output_normalizer - ) - to_physical_space!(random_input, samples) - - return gp, samples -end - -function gaussianprocess( - input::UQInput, - model::Vector{<:UQModel}, - output::Symbol, - ed::ExperimentalDesign, - kernel::Kernel, - mean::GaussianProcesses.Mean=MeanZero(), - log_noise::Real=-2.0, - optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_output::Bool=false -) - return gaussianprocess( - [input], model, output, - ed, kernel, mean, log_noise, - optimizer, normalize_output - ) -end - -function gaussianprocess( - input::Vector{<:UQInput}, - model::UQModel, - output::Symbol, - ed::ExperimentalDesign, - kernel::Kernel, - mean::GaussianProcesses.Mean=MeanZero(), - log_noise::Real=-2.0, - optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_output::Bool=false -) - return gaussianprocess( - input, [model], output, - ed, kernel, mean, log_noise, - optimizer, normalize_output - ) -end - -function gaussianprocess( - input::UQInput, - model::UQModel, - output::Symbol, - ed::ExperimentalDesign, - kernel::Kernel, - mean::GaussianProcesses.Mean=MeanZero(), - log_noise::Real=-2.0, - optimizer::Union{Optimizer, Nothing}=Optimizer(), - normalize_output::Bool=false -) - return gaussianprocess( - [input], [model], output, - ed, kernel, mean, log_noise, - optimizer, normalize_output - ) -end - -# what should this return? -function evaluate!(gpr::GaussianProcessRegressor, df::DataFrame) # this now gives mean and variance at input - data = Matrix(df[:, names(gpr.input)])' - if !isnothing(gpr.input_normalizer) - μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_normalizer, data)) - else - μ, Σ = predict_y(gpr.gp, data) - end - - if !isnothing(grp.output_normalizer) - μ[:] = μ .* gpr.output_normalizer.scale[1] .+ gpr.output_normalizer.mean[1] - Σ[:] = Σ .* gpr.output_normalizer.scale[1]^2 - end - - df[!, Symbol(gpr.output, "_mean")] = μ - df[!, Symbol(gpr.output, "_var")] = Σ - return nothing -end - -# Not sure how to design a similar function for gps, or if this is even desirable -# function sample(pce::PolynomialChaosExpansion, n::Integer) -# samps = hcat(sample.(n, pce.Ψ.bases)...) -# out = map(row -> dot(pce.y, evaluate(pce.Ψ, collect(row))), eachrow(samps)) - -# samps = DataFrame(map_from_bases(pce.Ψ, samps), names(pce.input)) -# to_physical_space!(pce.input, samps) - -# samps[!, pce.output] = out -# return samps -# end - From abac8063eafb1edb9b85584ff5d5d0b02f1dd6cf Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:49:24 +0200 Subject: [PATCH 019/117] Demonstration of how to use current version of gps --- demo/metamodels/gp_tests.jl | 94 ++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 38 deletions(-) diff --git a/demo/metamodels/gp_tests.jl b/demo/metamodels/gp_tests.jl index df8faa712..727b24450 100644 --- a/demo/metamodels/gp_tests.jl +++ b/demo/metamodels/gp_tests.jl @@ -1,55 +1,73 @@ using UncertaintyQuantification -using GaussianProcesses # do we reexport for mean and kernel functions etc.? -using DataFrames +using ParameterHandling +using AbstractGPs using Random -using Statistics +using DataFrames -Random.seed!(20140430) -# Training data -n=10; #number of training points -x = 2π * rand(n); #predictors -y = sin.(x) + 0.05*randn(n) .+ 1e3; #regressors +using Zygote +using Optim +using Plots -#Select mean and covariance function -mZero = MeanZero() #Zero mean function -mConst = MeanConst(1.0) -kern = SE(0.0,0.0) #Sqaured exponential kernel (note that hyperparameters are on the log scale) +Random.seed!(20140430) -inputs = :x -output = :y -df = DataFrame(inputs => x) -df[!, output] = y +## Training data +n = 10 -logObsNoise = -1.0 # log standard deviation of observation noise (this is optional) +# For interface with random input and model +x = RandomVariable(Uniform(0, 2π), :x) +y = Model( + df -> + (sin.(df.x) + 0.05*randn(length(df.x))), + :y, +) +exp_design = ExperimentalDesign(MonteCarlo(n)) -gp, = gaussianprocess(df, [inputs], output, kern, mZero, logObsNoise) +# For interface with DataFrame +df = sample(x, n) +evaluate!(y, df) -# gp = GP(x,y,mConst,kern,logObsNoise) #Fit the GP -# gp_scaled = GP(x,y_scaled,mZero,kern,logObsNoise-log(std(y))) +## Set up mean, kernel and noise +# mean +mean_params = (;) +mZero(θ) = ZeroMean() #Zero mean function -# gp = GP(x,y,mConst,kern) #Fit the GP -# gp_scaled = GP(x,y_scaled,mZero,kern, -2-log(std(y))) +# kernel +kernel_params = (; + σ = positive(.9), + ℓ = positive(.9) +) +kern(θ) = θ.σ^2 * with_lengthscale(SqExponentialKernel(), θ.ℓ) #Squared exponential kernel (note that hyperparameters are on the log scale) -# μ, σ² = predict_y(gp,range(0,stop=2π,length=100)) -# a, b = predict_y(gp_scaled,range(0,stop=2π,length=100)) -# a_ = a .* std(y) .+ mean(y) -# b_ = b .* std(y) +# noise +noise_params = (;noise = fixed(exp(-2.))) -using Optim +θ = (;mean = mean_params, kernel = kernel_params, noise = noise_params) +flat_params, unflatten = value_flatten(θ) -optimize_hyperparams!(gp; method=ConjugateGradient(), noise=false) # Optimise the hyperparameters -# optimize!(gp_scaled; method=ConjugateGradient(), noise=false) +random_inputs = filter(i -> isa(i, RandomUQInput), [x]) +random_names = names(random_inputs) -# plot(gp; legend=false, fmt=:png) #Plot the GP after the hyperparameters have been optimised +gpr = gaussianprocess( + [x], + y, + :y, + kern, + mZero, + kernel_params, + mean_params, + noise_params, + exp_design, + LBFGS() +) -# optimize!(gp; kern = false) # Don't optimize kernel hyperparameters -# optimize!(gp; kernbounds = [[-1, -1], [1, 1]]) # Optimize the kernel parameters in a box with lower bounds [-1, -1] and upper bounds [1, 1] +y_gp = gpr.gp(sample(x, 10)[:, 1]) -# using Plots #Load Plots.jl package +function plotdata() + plot(; xlabel="x", ylabel="y", legend=:bottomright) + return scatter!(df[:, 1], df[:, 2]; label="training data", ms=2, markerstrokewidth=0) +end -# scatter(x, y) -# plot!(range(0,stop=2π,length=100), μ, ribbon=σ²) -# plot!(range(0,stop=2π,length=100), a_, ribbon=b_) +plot_gp!(f; label) = plot!(f(sort!(sample(x, 100)[:, 1])); ribbon_scale=2, linewidth=1, label) -# plot(gp; xlabel="x", ylabel="y", title="Gaussian process", legend=false, fmt=:png) # Plot the GP -# plot(gp_scaled; xlabel="x", ylabel="y", title="Gaussian process", legend=false, fmt=:png) +plotdata() +plot_gp!(gpr.gp; label="posterior f(⋅)") \ No newline at end of file From a3df9f725aef869ad49b4b28803cfce611ebebc0 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:59:03 +0200 Subject: [PATCH 020/117] Preliminary demo files --- demo/metamodels/dataframe_to_gp_input.jl | 73 ++++++++++++++++++++++++ demo/metamodels/gp_abstract_gps.jl | 61 ++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 demo/metamodels/dataframe_to_gp_input.jl create mode 100644 demo/metamodels/gp_abstract_gps.jl diff --git a/demo/metamodels/dataframe_to_gp_input.jl b/demo/metamodels/dataframe_to_gp_input.jl new file mode 100644 index 000000000..43cdb4cf3 --- /dev/null +++ b/demo/metamodels/dataframe_to_gp_input.jl @@ -0,0 +1,73 @@ +using DataFrames +using UncertaintyQuantification +using AbstractGPs + +l = Parameter(1.8, :l) # length +b = Parameter(0.12, :b) # width + +h = RandomVariable(Normal(0.24, 0.01), :h) # height + +μ, σ = distribution_parameters(10e9, 1.6e9, LogNormal) +E = RandomVariable(LogNormal(μ, σ), :E) # young's modulus + +μ, σ = distribution_parameters(5000, 400, LogNormal) +P = RandomVariable(LogNormal(μ, σ), :P) # tip load + +μ, σ = distribution_parameters(600, 140, LogNormal) +ρ = RandomVariable(LogNormal(μ, σ), :ρ) # density + +c = GaussianCopula([1 0.8; 0.8 1]) +jd = JointDistribution([E, ρ], c) + +inputs = [l, b, h, P, jd] + +inertia = Model(df -> df.b .* df.h .^ 3 / 12, :I) + +displacement = Model( + df -> + (df.ρ .* 9.81 .* df.b .* df.h .* df.l .^ 4) ./ (8 .* df.E .* df.I) .+ + (df.P .* df.l .^ 3) ./ (3 .* df.E .* df.I), + :w, +) + +df = UncertaintyQuantification.sample(inputs, 20) +evaluate!([inertia, displacement], df) + +random_inputs = filter(i -> isa(i, RandomUQInput), inputs) +random_names = names(random_inputs) +output = :w + +IN = InputNormalizer(df, random_names, true) +Y = Matrix(df[:, random_names]) +Y_N = IN(df) + +UQIN = UQInputNormalizer(random_inputs, true) +UQY = Matrix(df[:, random_names]) +UQY_N = UQIN(df) + +# Generate toy data. +num_dims_in = 3 +num_dims_out = 2 +num_obs = 100 +X = randn(num_obs, num_dims_in) +Y = randn(num_obs, num_dims_out) + +# Convert to format required for AbstractGPs / KernelFunctions. +# See docstrings for more info. This is basically a no-op. +x, y = prepare_isotopic_multi_output_data(RowVecs(X), RowVecs(Y)) + +# Construct multi-output model. +f = GP(LinearMixingModelKernel([SEKernel(), Matern52Kernel()], randn(2, num_dims_out))) + +# Do the usual things that you would do with a single-output GP. +fx = f(x, 0.5) +logpdf(fx, y) +y_from_prior = rand(fx) +fx_mean = mean(fx) + +f_post = posterior(fx, Y[:]) + +mean(f_post(x)) +f_post(x) + +y_shaped = reshape(y_from_prior, :, num_dims_out) \ No newline at end of file diff --git a/demo/metamodels/gp_abstract_gps.jl b/demo/metamodels/gp_abstract_gps.jl new file mode 100644 index 000000000..9d9af3121 --- /dev/null +++ b/demo/metamodels/gp_abstract_gps.jl @@ -0,0 +1,61 @@ +using UncertaintyQuantification +using ParameterHandling +using AbstractGPs +using Random +using DataFrames + +using Zygote +using Optim +using Plots + +Random.seed!(20140430) + +## Training data +n = 10 + +# For interface with random input and model +x = RandomVariable(Uniform(0, 2π), :x) +y = Model( + df -> + (sin.(df.x) + 0.05*randn(length(df.x))), + :y, +) +exp_design = ExperimentalDesign(MonteCarlo(n)) + +# For interface with DataFrame +df = sample(x, n) +evaluate!(y, df) + +## Set up mean, kernel and noise +# mean +mean_params = (;) +mZero(θ) = ZeroMean() #Zero mean function + +# kernel +kernel_params = (; + σ = positive(1.), + ℓ = positive(1.) +) +kern(θ) = θ.σ^2 * with_lengthscale(SqExponentialKernel(), θ.ℓ) #Squared exponential kernel (note that hyperparameters are on the log scale) + +# noise +noise_params = (;noise = fixed(exp(-5.))) + +X = df[:, 1] +Y = df[:, 2] + +gp_prior = GP(mZero(mean_params), kern(ParameterHandling.value(kernel_params))) +fx = gp_prior(X, ParameterHandling.value(noise_params)[1]^2) +gp_post = posterior(fx, Y) + +y_gp = gpr.gp(sample(x, 10)[:, 1]) + +function plotdata() + plot(; xlabel="x", ylabel="y", legend=:bottomright) + return scatter!(df[:, 1], df[:, 2]; label="training data", ms=2, markerstrokewidth=0) +end + +plot_gp!(f; label) = plot!(f(sort!(sample(x, 100)[:, 1])); ribbon_scale=2, linewidth=1, label) + +plotdata() +plot_gp!(gp_post; label="posterior f(⋅)") \ No newline at end of file From 1a5304db3dc9f54a206019758b86a235581cf536 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:59:44 +0200 Subject: [PATCH 021/117] Current state of gp implementation --- src/models/gp/datamanipulation.jl | 125 ++++++++++++++++++ src/models/gp/gaussianprocess.jl | 213 ++++++++++++++++-------------- 2 files changed, 242 insertions(+), 96 deletions(-) create mode 100644 src/models/gp/datamanipulation.jl diff --git a/src/models/gp/datamanipulation.jl b/src/models/gp/datamanipulation.jl new file mode 100644 index 000000000..ecf47c985 --- /dev/null +++ b/src/models/gp/datamanipulation.jl @@ -0,0 +1,125 @@ +abstract type AbstractInputTransformer end +abstract type AbstractOutputTransformer end + +struct InputTransformer <: AbstractInputTransformer + transform::Union{ZScoreTransform, Nothing} # there is probably a better way to do this + input_names::Union{Symbol, Vector{<:Symbol}} + normalize::Bool +end + +# Construct from DataFrame +function InputTransformer( + df::DataFrame, + input_names::Union{Symbol, Vector{<:Symbol}}, + normalize::Bool +) + if normalize + X = df_to_array(df, input_names) + normalization = fit(ZScoreTransform, X; dims=1) + InputTransformer( + normalization, + input_names, + normalize + ) + else + InputTransformer( + nothing, + input_names, + normalize + ) + end +end + +function (transformer::InputTransformer)(df::DataFrame) + if !isnothing(transformer.input_transform) + X = df_to_array(df, transformer.input_names) + return StatsBase.transform(transformer.transform, X) + else + X = df_to_array(df, transformer.input_names) + return X + end +end + +struct UQInputTransformer <: AbstractInputTransformer + uqinputs::Union{UQInput, Vector{<:UQInput}} + normalize::Bool +end + +function (transformer::UQInputTransformer)(df::DataFrame) + if transformer.normalize + uqinput_names = names(transformer.uqinputs) + data = df[:, uqinput_names] + to_standard_normal_space!(transformer.uqinputs, data) + # X is a Matrix for multiple inputs, else it is a Vector + X = df_to_array(data, uqinput_names) + return X + else + uqinput_names = names(transformer.uqinputs) + # X is a Matrix for multiple inputs, else it is a Vector + X = df_to_array(df, uqinput_names) + return X + end +end + +struct Outputtransformer <: AbstractOutputTransformer + transform::Union{ZScoreTransform, Nothing} # there is probably a better way to do this + output_names::Union{Symbol, Vector{<:Symbol}} + normalize::Bool +end + +# Construct from DataFrame +function OutputTransformer( + df::DataFrame, + output_names::Union{Symbol, Vector{<:Symbol}}, + normalize::Bool +) + if normalize + Y = df_to_array(df, output_names) + normalization = fit(ZScoreTransform, Y; dims=1) + OutputTransformer( + normalization, + output_names, + normalize + ) + else + OutputTransformer( + nothing, + output_names, + normalize + ) + end +end + +function (transformer::OutputTransformer)(df::DataFrame) + if !isnothing(transformer.transform) + Y = df_to_array(df, transformer.output_names) + return StatsBase.transform(transformer.transform, Y) + else + Y = df_to_array(df, transformer.output_names) + return Y + end +end + +function inverse_transform(Y::Array, transformer::AbstractOutputTransformer) + if !isnothing(transformer.transform) + return StatsBase.reconstruct(transformer.transform, Y) + else + return Y + end +end + +function df_to_array( # That name sucks + df::DataFrame, + name::Union{Symbol, String} # do we use Strings? +) + return df[:, name] +end + +function df_to_array( # That name sucks + df::DataFrame, + names::Union{Vector{<:Symbol}, Vector{<:String}} # do we use Strings? +) + # check for the case where we want a single column but the name is given in a Vector + length(names) == 1 ? X = df_to_array(df, names[1]) : X = Matrix(df[:, names]) + return X +end \ No newline at end of file diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index a13ddb2d5..2c73c222f 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -11,104 +11,95 @@ julia> rs = ResponseSurface(data, :y, 2) |> DisplayAs.withcontext(:compact => tr ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutative{CreationOrder}, Graded{LexOrder}}[1, x₁, x₁²]) ``` """ -struct GaussianProcessRegressor <: UQModel - gp::AbstractGPs.AbstractGP - input::Union{Vector{<:UQInput}, Vector{Symbol}} - output::Symbol - data::DataFrame -end - -default_optimizer = LBFGS() +# default_optimizer = LBFGS() NoiseTypes = Union{ ParameterHandling.Positive, ParameterHandling.Bounded, ParameterHandling.Fixed } -default_mean() = ZeroMean() - -function normalize!( - input::Union{Vector{<:Real}, Matrix{<:Real}}, - output::Vector{<:Real}, - normalize_input::Bool, - normalize_output::Bool, - log_noise::Real -) - if normalize_input - input_normalizer = fit(ZScoreTransform, input) - input[:] = StatsBase.transform(input_normalizer, input) - else - input_normalizer = nothing - end +# default_mean() = ZeroMean() - if normalize_output - output_normalizer = fit(ZScoreTransform, output) - output[:] = StatsBase.transform(output_normalizer, output) - log_noise -= log(output_normalizer.scale[1]) - else - output_normalizer = nothing - end - - return input_normalizer, output_normalizer, log_noise +struct GaussianProcess <: UQModel + gp::AbstractGPs.AbstractGP + input::Union{Vector{<:UQInput}, Vector{Symbol}} + output::Symbol + inp_transformer::AbstractInputTransformer # not sure if these should transform hyperparams as well + out_transformer::AbstractOutputTransformer end -struct ExperimentalDesign # not sure about the name - sim::AbstractMonteCarlo # could also allow doe +function GaussianProcess( + gp::AbstractGPs.AbstractGP, + input::Union{UQInput, Symbol}, + output::Symbol, + inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well + out_transformer::AbstractOutputTransformer, # leaving that for later +) + GaussianProcess(gp, [input], output, inp_transformer, out_transformer) end -function logml(θ, input, output, mean_f, kernel_f) - gp = GP( - mean_f(ParameterHandling.value(θ.mean)), - kernel_f(ParameterHandling.value(θ.kernel)) - ) - f = gp( - input, - ParameterHandling.value(θ.noise)[1]^2 # same as in gaussianprocess... - ) - return -logpdf(f, output) -end +# Custom meanfunctions will break Zygote autodiff for multidimensional inputs +# Create from DataFrame +function gaussianprocess( + data::DataFrame, + inputs::Vector{Symbol}, + output::Symbol, + mean_f::Function, # should provide a default mean + mean_params::NamedTuple, + kernel_f::Function, + kernel_params::NamedTuple, # could be more specific than NamedTuple + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.AbstractOptimizer, Nothing}=nothing +) + inp_transformer = InputTransformer(data, inputs, normalize_inp) + out_transformer = OutputTransformer(data, output, normalize_out) -function maximize_logml(logml, θ, input, output, mean_f, kernel_f; optimizer, maxiter=1_000) - options = Optim.Options(; iterations=maxiter, show_trace=true) + θ = (; + mean = mean_params, + kernel = kernel_params, + noise = (;noise_params = noise) + ) - θ_flat, unflatten = ParameterHandling.value_flatten(θ) + # Turn DataFrame samples into X and Y arrays for GP + X = inp_transformer(data, inputs) + Y = out_transformer(data, output) - ## https://julianlsolvers.github.io/Optim.jl/stable/#user/tipsandtricks/#avoid-repeating-computations - function fg!(F, G, x) - if F !== nothing && G !== nothing - val, grad = Zygote.withgradient( - x -> logml(unflatten(x), input, output, mean_f, kernel_f), - x - ) - G .= only(grad) - return val - elseif G !== nothing - grad = Zygote.gradient( - x -> logml(unflatten(x), input, output, mean_f, kernel_f), - x - ) - G .= only(grad) - return nothing - elseif F !== nothing - return logml(unflatten(x), input, output, mean_f, kernel_f) - end + if isnothing(optimizer) + # If no optimizer is given we just conditionalize on output + gp = GP( + mean_f(ParameterHandling.value(θ.mean)), + kernel_f(ParameterHandling.value(θ.kernel)) + ) + fx = gp(X, ParameterHandling.value(θ.noise)[:noise_params]^2) # this should be possible to do in a better way... + gp = posterior(fx, Y) + else + # Use the passed optimizer to maximize marginal log likelihood + θ_opt, logml_ = maximize_logml(logml, θ, X, Y, mean_f, kernel_f; optimizer=optimizer) # should I return the logml? + gp = GP( + mean_f(ParameterHandling.value(θ_opt.mean)), + kernel_f(ParameterHandling.value(θ_opt.kernel)) + ) + fx = gp(X, ParameterHandling.value(θ_opt.noise)[:noise_params]^2) # this should be possible to do in a better way... + gp = posterior(fx, Y) end - result = optimize(Optim.only_fg!(fg!), θ_flat, optimizer, options; inplace=false) - - return unflatten(result.minimizer), result + return GaussianProcess(gp, random_inputs, output, inp_transformer, out_transformer) end +# This creates a DataFrame and the calls the method above function gaussianprocess( inputs::Vector{<:UQInput}, model::UQModel, output::Symbol, - kernel_f::Function, mean_f::Function, # should provide a default mean - kernel_params::NamedTuple, # could be more specific than NamedTuple mean_params::NamedTuple, - noise::NamedTuple, # how to do default value? (=positive(exp(-2.0))) - exp_design::ExperimentalDesign, - optimizer::Union{Optim.AbstractOptimizer, Nothing}=default_optimizer + kernel_f::Function, + kernel_params::NamedTuple, # could be more specific than NamedTuple + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.AbstractOptimizer, Nothing}=nothing ) samples = sample(inputs, exp_design.sim) # need to be able to pass experimental design evaluate!(model, samples) @@ -148,12 +139,7 @@ function gaussianprocess( gp = posterior(fx, Y) end - # to_physical_space!(random_inputs, samples) - - # not sure if i need to return samples - # maybe return the log marginal likelihood - - return GaussianProcessRegressor(gp, random_inputs, output, samples) + return GaussianProcess(gp, random_inputs, output, samples) end function gaussianprocess( @@ -177,17 +163,17 @@ function gaussianprocess( end # what should this return? -function evaluate!(gpr::GaussianProcessRegressor, df::DataFrame) # this now gives mean and variance at input +function evaluate!(gpr::GaussianProcess, df::DataFrame) # this now gives mean and variance at input data = Matrix(df[:, names(gpr.input)])' - if !isnothing(gpr.input_normalizer) - μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_normalizer, data)) + if !isnothing(gpr.input_transformer) + μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_transformer, data)) else μ, Σ = predict_y(gpr.gp, data) end - if !isnothing(grp.output_normalizer) - μ[:] = μ .* gpr.output_normalizer.scale[1] .+ gpr.output_normalizer.mean[1] - Σ[:] = Σ .* gpr.output_normalizer.scale[1]^2 + if !isnothing(grp.output_transformer) + μ[:] = μ .* gpr.output_transformer.scale[1] .+ gpr.output_transformer.mean[1] + Σ[:] = Σ .* gpr.output_transformer.scale[1]^2 end df[!, Symbol(gpr.output, "_mean")] = μ @@ -195,15 +181,50 @@ function evaluate!(gpr::GaussianProcessRegressor, df::DataFrame) # this now give return nothing end -# Not sure how to design a similar function for gps, or if this is even desirable -# function sample(pce::PolynomialChaosExpansion, n::Integer) -# samps = hcat(sample.(n, pce.Ψ.bases)...) -# out = map(row -> dot(pce.y, evaluate(pce.Ψ, collect(row))), eachrow(samps)) +struct ExperimentalDesign # not sure about the name + sim::AbstractMonteCarlo # could also allow doe +end + +function logml(θ, input, output, mean_f, kernel_f) + gp = GP( + mean_f(ParameterHandling.value(θ.mean)), + kernel_f(ParameterHandling.value(θ.kernel)) + ) + f = gp( + input, + ParameterHandling.value(θ.noise)[:noise_params]^2 # same as in gaussianprocess... + ) + return -logpdf(f, output) +end + +function maximize_logml(logml, θ, input, output, mean_f, kernel_f; optimizer, maxiter=1_000) + options = Optim.Options(; iterations=maxiter, show_trace=true) + + θ_flat, unflatten = ParameterHandling.value_flatten(θ) + + ## https://julianlsolvers.github.io/Optim.jl/stable/#user/tipsandtricks/#avoid-repeating-computations + function fg!(F, G, x) + if F !== nothing && G !== nothing + val, grad = Zygote.withgradient( + x -> logml(unflatten(x), input, output, mean_f, kernel_f), + x + ) + G .= only(grad) + return val + elseif G !== nothing + grad = Zygote.gradient( + x -> logml(unflatten(x), input, output, mean_f, kernel_f), + x + ) + G .= only(grad) + return nothing + elseif F !== nothing + return logml(unflatten(x), input, output, mean_f, kernel_f) + end + end -# samps = DataFrame(map_from_bases(pce.Ψ, samps), names(pce.input)) -# to_physical_space!(pce.input, samps) + result = optimize(Optim.only_fg!(fg!), θ_flat, optimizer, options; inplace=false) -# samps[!, pce.output] = out -# return samps -# end + return unflatten(result.minimizer), result +end From a34afa9ae0565083ea60a594333f57984b676d2e Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 12:35:32 +0200 Subject: [PATCH 022/117] Added packages AbstractGPs, ParameterHandling and Zygote --- Project.toml | 3 +++ src/UncertaintyQuantification.jl | 3 +++ 2 files changed, 6 insertions(+) diff --git a/Project.toml b/Project.toml index c4e37b0d5..41d9fe3ed 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Jasper Behrensdorf ", "Ander Gray < version = "0.10.0" [deps] +AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918" Bootstrap = "e28b5b4c-05e8-5b66-bc03-6f0c0a0a06e0" CovarianceEstimation = "587fd27a-f159-11e8-2dae-1979310e6154" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" @@ -21,12 +22,14 @@ MeshAdaptiveDirectSearch = "f4d74008-4565-11e9-04bd-4fe404e6a92a" Monomials = "272bfe72-f66c-432f-a94d-600f29493792" Mustache = "ffc61752-8dc7-55ee-8c37-f3e9cdd09e70" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +ParameterHandling = "2412ca09-6db7-441c-8e3a-88d5709968c5" Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] Bootstrap = "2.2" diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index 2c957dd58..4b4bbc850 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -1,5 +1,6 @@ module UncertaintyQuantification +using AbstractGPs using Bootstrap using CovarianceEstimation using DataFrames @@ -15,11 +16,13 @@ using MeshAdaptiveDirectSearch using Monomials using Mustache using Optim # needed for GPs to support other optimizers + settings +using ParameterHandling using Primes using QuasiMonteCarlo using Random using Reexport using StatsBase +using Zygote @reexport using Distributions From cc4fd8f4718649504ce433bce4c8c13b20bc50ae Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 15:04:56 +0200 Subject: [PATCH 023/117] added constructors for every input case --- src/models/gp/gaussianprocess.jl | 489 ++++++++++++++++++++++--------- 1 file changed, 358 insertions(+), 131 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 2c73c222f..e6a2dc28d 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -11,18 +11,12 @@ julia> rs = ResponseSurface(data, :y, 2) |> DisplayAs.withcontext(:compact => tr ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutative{CreationOrder}, Graded{LexOrder}}[1, x₁, x₁²]) ``` """ -# default_optimizer = LBFGS() -NoiseTypes = Union{ - ParameterHandling.Positive, - ParameterHandling.Bounded, - ParameterHandling.Fixed - } -# default_mean() = ZeroMean() - struct GaussianProcess <: UQModel gp::AbstractGPs.AbstractGP input::Union{Vector{<:UQInput}, Vector{Symbol}} - output::Symbol + output::Vector{Symbol} + inp_dim::Int + out_dim::Int inp_transformer::AbstractInputTransformer # not sure if these should transform hyperparams as well out_transformer::AbstractOutputTransformer end @@ -31,195 +25,407 @@ function GaussianProcess( gp::AbstractGPs.AbstractGP, input::Union{UQInput, Symbol}, output::Symbol, + inp_dim::Int, + out_dim::Int, + inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well + out_transformer::AbstractOutputTransformer, # leaving that for later +) + GaussianProcess( + gp, [input], [output], + inp_dim, out_dim, + inp_transformer, out_transformer + ) +end + +function GaussianProcess( + gp::AbstractGPs.AbstractGP, + input::Union{Vector{<:UQInput}, Vector{Symbol}}, + output::Symbol, + inp_dim::Int, + out_dim::Int, + inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well + out_transformer::AbstractOutputTransformer, # leaving that for later +) + GaussianProcess( + gp, input, [output], + inp_dim, out_dim, + inp_transformer, out_transformer + ) +end + +function GaussianProcess( + gp::AbstractGPs.AbstractGP, + input::Union{UQInput, Symbol}, + output::Vector{Symbol}, + inp_dim::Int, + out_dim::Int, + inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well + out_transformer::AbstractOutputTransformer, # leaving that for later +) + GaussianProcess( + gp, [input], output, + inp_dim, out_dim, + inp_transformer, out_transformer + ) +end + +function GaussianProcess( + gp::AbstractGPs.AbstractGP, + input::Union{Vector{<:UQInput}, Vector{Symbol}}, + output::Symbol, inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well out_transformer::AbstractOutputTransformer, # leaving that for later ) - GaussianProcess(gp, [input], output, inp_transformer, out_transformer) + GaussianProcess( + gp, [input], [output], + 1, 1, + inp_transformer, out_transformer + ) end +""" """ +NoiseTypes = Union{ + ParameterHandling.Positive, + ParameterHandling.Bounded, + ParameterHandling.Fixed + } + # Custom meanfunctions will break Zygote autodiff for multidimensional inputs # Create from DataFrame -function gaussianprocess( +function GaussianProcess( + data::DataFrame, + inputs::Symbol, + outputs::Symbol, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + (inp_dim, out_dim, + inp_transformer, out_transformer, + x, y) = _handle_gp_input( + data, inputs, outputs, + normalize_inp, normalize_out + ) + + θ = (; + mean_and_kernel = params, + noise = (;noise_params = noise) + ) + + gp = build_gp_posterior(build_gp, θ, x, y, optimizer) + + return GaussianProcess( + gp, inputs, outputs, + inp_dim, out_dim, + inp_transformer, out_transformer + ) +end + +function GaussianProcess( data::DataFrame, inputs::Vector{Symbol}, - output::Symbol, - mean_f::Function, # should provide a default mean - mean_params::NamedTuple, - kernel_f::Function, - kernel_params::NamedTuple, # could be more specific than NamedTuple + outputs::Vector{Symbol}, + build_gp::Function, + params::NamedTuple, noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... normalize_inp::Bool=false, normalize_out::Bool=false, - optimizer::Union{Optim.AbstractOptimizer, Nothing}=nothing + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing ) - inp_transformer = InputTransformer(data, inputs, normalize_inp) - out_transformer = OutputTransformer(data, output, normalize_out) + (inp_dim, out_dim, + inp_transformer, out_transformer, + x, y) = _handle_gp_input( + data, inputs, outputs, + normalize_inp, normalize_out + ) θ = (; - mean = mean_params, - kernel = kernel_params, + mean_and_kernel = params, noise = (;noise_params = noise) ) - # Turn DataFrame samples into X and Y arrays for GP - X = inp_transformer(data, inputs) - Y = out_transformer(data, output) + gp = build_gp_posterior(build_gp, θ, x, y, optimizer) - if isnothing(optimizer) - # If no optimizer is given we just conditionalize on output - gp = GP( - mean_f(ParameterHandling.value(θ.mean)), - kernel_f(ParameterHandling.value(θ.kernel)) - ) - fx = gp(X, ParameterHandling.value(θ.noise)[:noise_params]^2) # this should be possible to do in a better way... - gp = posterior(fx, Y) - else - # Use the passed optimizer to maximize marginal log likelihood - θ_opt, logml_ = maximize_logml(logml, θ, X, Y, mean_f, kernel_f; optimizer=optimizer) # should I return the logml? - gp = GP( - mean_f(ParameterHandling.value(θ_opt.mean)), - kernel_f(ParameterHandling.value(θ_opt.kernel)) - ) - fx = gp(X, ParameterHandling.value(θ_opt.noise)[:noise_params]^2) # this should be possible to do in a better way... - gp = posterior(fx, Y) - end + return GaussianProcess( + gp, inputs, outputs, + inp_dim, out_dim, + inp_transformer, out_transformer + ) +end - return GaussianProcess(gp, random_inputs, output, inp_transformer, out_transformer) +function GaussianProcess( + data::DataFrame, + inputs::Vector{Symbol}, + outputs::Symbol, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + return GaussianProcess( + data, inputs, [outputs], + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) end +function GaussianProcess( + data::DataFrame, + inputs::Symbol, + outputs::Vector{Symbol}, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + return GaussianProcess( + data, [inputs], outputs, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) +end + +""" experimental design """ +struct ExperimentalDesign # not sure about the name + sim::AbstractMonteCarlo # could also allow doe +end + +""" GP from uqinput and model """ # This creates a DataFrame and the calls the method above -function gaussianprocess( - inputs::Vector{<:UQInput}, - model::UQModel, - output::Symbol, - mean_f::Function, # should provide a default mean - mean_params::NamedTuple, - kernel_f::Function, - kernel_params::NamedTuple, # could be more specific than NamedTuple +# need to treat this differently because univariate in- and output +function GaussianProcess( + inputs::UQInput, + model::Vector{<:UQModel}, + outputs::Symbol, + exp_design::ExperimentalDesign, + build_gp::Function, + params::NamedTuple, noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... normalize_inp::Bool=false, normalize_out::Bool=false, - optimizer::Union{Optim.AbstractOptimizer, Nothing}=nothing + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing ) - samples = sample(inputs, exp_design.sim) # need to be able to pass experimental design - evaluate!(model, samples) + data = sample(inputs, exp_design.sim) # need to be able to pass experimental design + evaluate!(model, data) - random_inputs = filter(i -> isa(i, RandomUQInput), inputs) - random_names = names(random_inputs) + return GaussianProcess( + data, inputs, outputs, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) +end - # to_standard_normal_space!(random_inputs, samples) # maybe let user choose standardization +# need to treat this differently because univariate in- and output +function GaussianProcess( + inputs::UQInput, + model::UQModel, + outputs::Symbol, + exp_design::ExperimentalDesign, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + return GaussianProcess( + inputs, [model], outputs, exp_design, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) +end - θ = (; - mean = mean_params, - kernel = kernel_params, - noise = noise - ) +# All these cases dispatch to the same method +function GaussianProcess( + inputs::Vector{<:UQInput}, + model::Vector{<:UQModel}, + outputs::Vector{Symbol}, + exp_design::ExperimentalDesign, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + data = sample(inputs, exp_design.sim) # need to be able to pass experimental design + evaluate!(model, data) - # Turn DataFrame samples into arrays of correct size - X = Array(samples[:, random_names]) - Y = Array(samples[:, output]) - size(X, 2) == 1 ? X = dropdims(X; dims=2) : nothing # this is not safe for every case at the moment + return GaussianProcess( + data, inputs, outputs, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) +end - if isnothing(optimizer) - # If no optimizer is given we just conditionalize on output - gp = GP( - mean_f(ParameterHandling.value(θ.mean)), - kernel_f(ParameterHandling.value(θ.kernel)) - ) - fx = gp(X, ParameterHandling.value(θ.noise)[1]^2) # this should be possible to do in a better way... - gp = posterior(fx, Y) - else - # Use the passed optimizer to maximize marginal log likelihood - θ_opt, logml_ = maximize_logml(logml, θ, X, Y, mean_f, kernel_f; optimizer=optimizer) # should I return the logml? - gp = GP( - mean_f(ParameterHandling.value(θ_opt.mean)), - kernel_f(ParameterHandling.value(θ_opt.kernel)) - ) - fx = gp(X, ParameterHandling.value(θ_opt.noise)[1]^2) # this should be possible to do in a better way... - gp = posterior(fx, Y) - end +function GaussianProcess( + inputs::Vector{<:UQInput}, + model::UQModel, + outputs::Vector{Symbol}, + exp_design::ExperimentalDesign, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + return GaussianProcess( + inputs, [model], outputs, exp_design, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) +end - return GaussianProcess(gp, random_inputs, output, samples) +function GaussianProcess( + inputs::Vector{<:UQInput}, + model::UQModel, + outputs::Symbol, + exp_design::ExperimentalDesign, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + return GaussianProcess( + inputs, [model], [outputs], exp_design, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) end -function gaussianprocess( - inputs::Symbol, +function GaussianProcess( + inputs::UQInput, + model::Vector{<:UQModel}, + outputs::Vector{Symbol}, + exp_design::ExperimentalDesign, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + return GaussianProcess( + [inputs], model, outputs, exp_design, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) +end + +function GaussianProcess( + inputs::UQInput, model::UQModel, - output::Symbol, - kernel::Function, - mean::Function=default_mean, - kernel_params::NamedTuple, # could be more specific than NamedTuple - mean_params::NamedTuple, - noise::NamedTuple, # how to do default value? (=positive(exp(-2.0))) - optimizer::Union{Optimizer, Nothing}=default_optimizer, - exp_design::ExperimentalDesign + outputs::Vector{Symbol}, + exp_design::ExperimentalDesign, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing ) - return gaussianprocess( - [inputs], model, output, - kernel, mean, kernel_params, - mean_params, noise, optimizer, - exp_design + return GaussianProcess( + [inputs], [model], outputs, exp_design, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer + ) +end + +function GaussianProcess( + inputs::Vector{<:UQInput}, + model::Vector{<:UQModel}, + outputs::Symbol, + exp_design::ExperimentalDesign, + build_gp::Function, + params::NamedTuple, + noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... + normalize_inp::Bool=false, + normalize_out::Bool=false, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing +) + return GaussianProcess( + inputs, model, [outputs], exp_design, + build_gp, params, noise, + normalize_inp, normalize_out, + optimizer ) end # what should this return? -function evaluate!(gpr::GaussianProcess, df::DataFrame) # this now gives mean and variance at input - data = Matrix(df[:, names(gpr.input)])' - if !isnothing(gpr.input_transformer) - μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_transformer, data)) - else - μ, Σ = predict_y(gpr.gp, data) +function evaluate!(gp::GaussianProcess, df::DataFrame) # this now gives mean and variance at input + if gp.inp_dim != 1 || gp.out_dim != 1 # at the moment we only support out_dim = 1 + XX = MOInput(RowVecs(X), gp.out_dim) end - if !isnothing(grp.output_transformer) - μ[:] = μ .* gpr.output_transformer.scale[1] .+ gpr.output_transformer.mean[1] - Σ[:] = Σ .* gpr.output_transformer.scale[1]^2 - end + # data = Matrix(df[:, names(gpr.input)])' + # if !isnothing(gpr.input_transformer) + # μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_transformer, data)) + # else + # μ, Σ = predict_y(gpr.gp, data) + # end - df[!, Symbol(gpr.output, "_mean")] = μ - df[!, Symbol(gpr.output, "_var")] = Σ - return nothing -end + # if !isnothing(grp.output_transformer) + # μ[:] = μ .* gpr.output_transformer.scale[1] .+ gpr.output_transformer.mean[1] + # Σ[:] = Σ .* gpr.output_transformer.scale[1]^2 + # end -struct ExperimentalDesign # not sure about the name - sim::AbstractMonteCarlo # could also allow doe + # df[!, Symbol(gpr.output, "_mean")] = μ + # df[!, Symbol(gpr.output, "_var")] = Σ + # return nothing end -function logml(θ, input, output, mean_f, kernel_f) - gp = GP( - mean_f(ParameterHandling.value(θ.mean)), - kernel_f(ParameterHandling.value(θ.kernel)) - ) +""" maximize_logml(logml, θ, x, y, build_gp; optimizer=optimizer) """ +function logml(θ, x, y, build_gp) + gp = build_gp(ParameterHandling.value(θ.mean_and_kernel)) f = gp( - input, - ParameterHandling.value(θ.noise)[:noise_params]^2 # same as in gaussianprocess... + x, + only(ParameterHandling.value(θ.noise))^2 # same as in GaussianProcess... ) - return -logpdf(f, output) + return -logpdf(f, y) end -function maximize_logml(logml, θ, input, output, mean_f, kernel_f; optimizer, maxiter=1_000) +function maximize_logml(logml, θ, x, y, build_gp; optimizer, maxiter=1_000) options = Optim.Options(; iterations=maxiter, show_trace=true) θ_flat, unflatten = ParameterHandling.value_flatten(θ) ## https://julianlsolvers.github.io/Optim.jl/stable/#user/tipsandtricks/#avoid-repeating-computations - function fg!(F, G, x) + function fg!(F, G, θᵢ) if F !== nothing && G !== nothing val, grad = Zygote.withgradient( - x -> logml(unflatten(x), input, output, mean_f, kernel_f), - x + θᵢ -> logml(unflatten(θᵢ), x, y, build_gp), + θᵢ ) G .= only(grad) return val elseif G !== nothing grad = Zygote.gradient( - x -> logml(unflatten(x), input, output, mean_f, kernel_f), - x + θᵢ -> logml(unflatten(θᵢ), x, y, build_gp), + θᵢ ) G .= only(grad) return nothing elseif F !== nothing - return logml(unflatten(x), input, output, mean_f, kernel_f) + return logml(unflatten(θᵢ), x, y, build_gp) end end @@ -228,3 +434,24 @@ function maximize_logml(logml, θ, input, output, mean_f, kernel_f; optimizer, m return unflatten(result.minimizer), result end +function build_gp_posterior( + build_gp::Function, + θ::NamedTuple, + x::AbstractArray{<:Real}, + y::AbstractArray{<:Real}, + optimizer::Union{Optim.FirstOrderOptimizer, Nothing} +) + if isnothing(optimizer) + # If no optimizer is given we just conditionalize on output + gp = build_gp(ParameterHandling.value(θ.mean_and_kernel)) + fx = gp(x, only(ParameterHandling.value(θ.noise))^2) # this should be possible to do in a better way... + gp = posterior(fx, y) + else + # Use the passed optimizer to maximize marginal log likelihood + θ_opt, logml_ = maximize_logml(logml, θ, x, y, build_gp; optimizer=optimizer) # should I return the logml? + gp = build_gp(ParameterHandling.value(θ_opt.mean_and_kernel)) + fx = gp(x, only(ParameterHandling.value(θ_opt.noise))^2) # this should be possible to do in a better way... + gp = posterior(fx, y) + end + return gp +end From 57625a820e4fe68f7608de578e351ccee97ac944 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 15:05:32 +0200 Subject: [PATCH 024/117] added function _handle_gp_input for AbstractGPs interface --- src/models/gp/datamanipulation.jl | 99 ++++++++++++++++++++++++++----- 1 file changed, 85 insertions(+), 14 deletions(-) diff --git a/src/models/gp/datamanipulation.jl b/src/models/gp/datamanipulation.jl index ecf47c985..cd42b0ab3 100644 --- a/src/models/gp/datamanipulation.jl +++ b/src/models/gp/datamanipulation.jl @@ -14,7 +14,7 @@ function InputTransformer( normalize::Bool ) if normalize - X = df_to_array(df, input_names) + X = _dataframe_to_array(df, input_names) normalization = fit(ZScoreTransform, X; dims=1) InputTransformer( normalization, @@ -31,11 +31,11 @@ function InputTransformer( end function (transformer::InputTransformer)(df::DataFrame) - if !isnothing(transformer.input_transform) - X = df_to_array(df, transformer.input_names) + if !isnothing(transformer.transform) + X = _dataframe_to_array(df, transformer.input_names) return StatsBase.transform(transformer.transform, X) else - X = df_to_array(df, transformer.input_names) + X = _dataframe_to_array(df, transformer.input_names) return X end end @@ -51,17 +51,17 @@ function (transformer::UQInputTransformer)(df::DataFrame) data = df[:, uqinput_names] to_standard_normal_space!(transformer.uqinputs, data) # X is a Matrix for multiple inputs, else it is a Vector - X = df_to_array(data, uqinput_names) + X = _dataframe_to_array(data, uqinput_names) return X else uqinput_names = names(transformer.uqinputs) # X is a Matrix for multiple inputs, else it is a Vector - X = df_to_array(df, uqinput_names) + X = _dataframe_to_array(df, uqinput_names) return X end end -struct Outputtransformer <: AbstractOutputTransformer +struct OutputTransformer <: AbstractOutputTransformer transform::Union{ZScoreTransform, Nothing} # there is probably a better way to do this output_names::Union{Symbol, Vector{<:Symbol}} normalize::Bool @@ -74,7 +74,7 @@ function OutputTransformer( normalize::Bool ) if normalize - Y = df_to_array(df, output_names) + Y = _dataframe_to_array(df, output_names) normalization = fit(ZScoreTransform, Y; dims=1) OutputTransformer( normalization, @@ -90,12 +90,12 @@ function OutputTransformer( end end -function (transformer::OutputTransformer)(df::DataFrame) +function (transformer::AbstractOutputTransformer)(df::DataFrame) if !isnothing(transformer.transform) - Y = df_to_array(df, transformer.output_names) + Y = _dataframe_to_array(df, transformer.output_names) return StatsBase.transform(transformer.transform, Y) else - Y = df_to_array(df, transformer.output_names) + Y = _dataframe_to_array(df, transformer.output_names) return Y end end @@ -108,18 +108,89 @@ function inverse_transform(Y::Array, transformer::AbstractOutputTransformer) end end -function df_to_array( # That name sucks +function _dataframe_to_array( # That name sucks df::DataFrame, name::Union{Symbol, String} # do we use Strings? ) return df[:, name] end -function df_to_array( # That name sucks +function _dataframe_to_array( # That name sucks df::DataFrame, names::Union{Vector{<:Symbol}, Vector{<:String}} # do we use Strings? ) # check for the case where we want a single column but the name is given in a Vector - length(names) == 1 ? X = df_to_array(df, names[1]) : X = Matrix(df[:, names]) + length(names) == 1 ? X = _dataframe_to_array(df, names[1]) : X = Matrix(df[:, names]) return X +end + +function _handle_gp_input( + data::DataFrame, + input::Symbol, + output::Symbol, + normalize_inp::Bool=false, + normalize_out::Bool=false +) + inp_dim = 1 + out_dim = 1 + inp_transformer = InputTransformer(data, input, normalize_inp) + out_transformer = OutputTransformer(data, output, normalize_out) + + # Turn DataFrame samples into X and Y arrays for GP + x = inp_transformer(data) + y = out_transformer(data) + + return ( + inp_dim, out_dim, + inp_transformer, out_transformer, + x, y) +end + +function _handle_gp_input( + data::DataFrame, + inputs::Vector{Symbol}, + outputs::Vector{Symbol}, + normalize_inp::Bool=false, + normalize_out::Bool=false +) + inp_dim = length(inputs) + out_dim = length(outputs) + inp_transformer = InputTransformer(data, inputs, normalize_inp) + out_transformer = OutputTransformer(data, output, normalize_out) + + # Turn DataFrame samples into X and Y arrays for GP + X = inp_transformer(data) + Y = out_transformer(data) + x, y = prepare_isotopic_multi_output_data(RowVecs(X), RowVecs(Y)) + + return ( + inp_dim, out_dim, + inp_transformer, out_transformer, + x, y) +end + +function _handle_gp_input( + data::DataFrame, + inputs::Vector{Symbol}, + output::Symbol, + normalize_inp::Bool=false, + normalize_out::Bool=false +) + return _handle_gp_input( + data, inputs, [output], + normalize_inp, normalize_out + ) +end + +function _handle_gp_input( + data::DataFrame, + input::Symbol, + outputs::Vector{Symbol}, + normalize_inp::Bool=false, + normalize_out::Bool=false +) + return _handle_gp_input( + data, [input], outputs, + normalize_inp, normalize_out + ) end \ No newline at end of file From a24aa983cc2c6a436b6a5ec05d30b36fe40f3df7 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 15:30:38 +0200 Subject: [PATCH 025/117] added evaluate! method for gaussianprocesses --- src/models/gp/gaussianprocess.jl | 34 +++++++++++++++----------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index e6a2dc28d..c390db36a 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -370,27 +370,25 @@ function GaussianProcess( ) end -# what should this return? +# what should this calculate? Calculates mean for now function evaluate!(gp::GaussianProcess, df::DataFrame) # this now gives mean and variance at input - if gp.inp_dim != 1 || gp.out_dim != 1 # at the moment we only support out_dim = 1 - XX = MOInput(RowVecs(X), gp.out_dim) + x = gp.inp_transformer(df) + + if gp.inp_dim != 1 || gp.out_dim != 1 # here we have to reformat the input + x = MOInput(RowVecs(x), gp.out_dim) + end + + y = mean(gp.gp(x)) + + if gp.out_dim == 1 + y = inverse_transform(y, gp.out_transformer) + else + y = reshape(mean(gp.gp(x)), :, gp.out_dim) + y = inverse_transform(y, gp.out_transformer) end - # data = Matrix(df[:, names(gpr.input)])' - # if !isnothing(gpr.input_transformer) - # μ, Σ = predict_y(gpr.gp, StatsBase.transform!(grp.input_transformer, data)) - # else - # μ, Σ = predict_y(gpr.gp, data) - # end - - # if !isnothing(grp.output_transformer) - # μ[:] = μ .* gpr.output_transformer.scale[1] .+ gpr.output_transformer.mean[1] - # Σ[:] = Σ .* gpr.output_transformer.scale[1]^2 - # end - - # df[!, Symbol(gpr.output, "_mean")] = μ - # df[!, Symbol(gpr.output, "_var")] = Σ - # return nothing + insertcols!(df, (gp.output .=> eachcol(y))...) + return nothing end """ maximize_logml(logml, θ, x, y, build_gp; optimizer=optimizer) """ From 9399cb955a4338b320168a44e8cbaa96da28764c Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 16:41:40 +0200 Subject: [PATCH 026/117] added a demo file for gps --- demo/metamodels/dataframe_to_gp_input.jl | 73 --------------------- demo/metamodels/gaussianprocess.jl | 82 ++++++++++++++++++++++++ demo/metamodels/gp_abstract_gps.jl | 61 ------------------ demo/metamodels/gp_tests.jl | 73 --------------------- 4 files changed, 82 insertions(+), 207 deletions(-) delete mode 100644 demo/metamodels/dataframe_to_gp_input.jl create mode 100644 demo/metamodels/gaussianprocess.jl delete mode 100644 demo/metamodels/gp_abstract_gps.jl delete mode 100644 demo/metamodels/gp_tests.jl diff --git a/demo/metamodels/dataframe_to_gp_input.jl b/demo/metamodels/dataframe_to_gp_input.jl deleted file mode 100644 index 43cdb4cf3..000000000 --- a/demo/metamodels/dataframe_to_gp_input.jl +++ /dev/null @@ -1,73 +0,0 @@ -using DataFrames -using UncertaintyQuantification -using AbstractGPs - -l = Parameter(1.8, :l) # length -b = Parameter(0.12, :b) # width - -h = RandomVariable(Normal(0.24, 0.01), :h) # height - -μ, σ = distribution_parameters(10e9, 1.6e9, LogNormal) -E = RandomVariable(LogNormal(μ, σ), :E) # young's modulus - -μ, σ = distribution_parameters(5000, 400, LogNormal) -P = RandomVariable(LogNormal(μ, σ), :P) # tip load - -μ, σ = distribution_parameters(600, 140, LogNormal) -ρ = RandomVariable(LogNormal(μ, σ), :ρ) # density - -c = GaussianCopula([1 0.8; 0.8 1]) -jd = JointDistribution([E, ρ], c) - -inputs = [l, b, h, P, jd] - -inertia = Model(df -> df.b .* df.h .^ 3 / 12, :I) - -displacement = Model( - df -> - (df.ρ .* 9.81 .* df.b .* df.h .* df.l .^ 4) ./ (8 .* df.E .* df.I) .+ - (df.P .* df.l .^ 3) ./ (3 .* df.E .* df.I), - :w, -) - -df = UncertaintyQuantification.sample(inputs, 20) -evaluate!([inertia, displacement], df) - -random_inputs = filter(i -> isa(i, RandomUQInput), inputs) -random_names = names(random_inputs) -output = :w - -IN = InputNormalizer(df, random_names, true) -Y = Matrix(df[:, random_names]) -Y_N = IN(df) - -UQIN = UQInputNormalizer(random_inputs, true) -UQY = Matrix(df[:, random_names]) -UQY_N = UQIN(df) - -# Generate toy data. -num_dims_in = 3 -num_dims_out = 2 -num_obs = 100 -X = randn(num_obs, num_dims_in) -Y = randn(num_obs, num_dims_out) - -# Convert to format required for AbstractGPs / KernelFunctions. -# See docstrings for more info. This is basically a no-op. -x, y = prepare_isotopic_multi_output_data(RowVecs(X), RowVecs(Y)) - -# Construct multi-output model. -f = GP(LinearMixingModelKernel([SEKernel(), Matern52Kernel()], randn(2, num_dims_out))) - -# Do the usual things that you would do with a single-output GP. -fx = f(x, 0.5) -logpdf(fx, y) -y_from_prior = rand(fx) -fx_mean = mean(fx) - -f_post = posterior(fx, Y[:]) - -mean(f_post(x)) -f_post(x) - -y_shaped = reshape(y_from_prior, :, num_dims_out) \ No newline at end of file diff --git a/demo/metamodels/gaussianprocess.jl b/demo/metamodels/gaussianprocess.jl new file mode 100644 index 000000000..762d8dc7d --- /dev/null +++ b/demo/metamodels/gaussianprocess.jl @@ -0,0 +1,82 @@ +using UncertaintyQuantification, Plots +using AbstractGPs # not sure if to reexport +using ParameterHandling # not sure if to reexport +using Optim +using Random + +Random.seed!(20140430) +n = 10 +input_symbol = :x +output_symbol = :y + +# Input_symbol for passing a UQModel to GaussianProcess +x = RandomVariable(Uniform(0, 2π), input_symbol) +y = Model( + df -> + (sin.(df.x) + 0.05*randn(length(df.x))), + output_symbol, +) +exp_design = ExperimentalDesign(MonteCarlo(n)) + +# Input_symbol for passing a DataFrame to GaussianProcess +df = sample(x, n) +evaluate!(y, df) + +# Define how the GP is build +function build_gp(θ::NamedTuple) + k1 = (θ.SE.σ)^2 * with_lengthscale(SqExponentialKernel(), θ.SE.ℓ) + k2 = (θ.RQ.σ)^2 * with_lengthscale(RationalQuadraticKernel(; α=θ.RQ.α), θ.RQ.ℓ) + return GP(k1+k2) +end + +# gp parameters +θ = (; + SE = (; + σ = positive(1.), + ℓ = positive(1.) + ), + RQ = (; + σ = positive(1.), + ℓ = positive(1.), + α = positive(exp(-1.0)) + ) +) + +# noise +noise = fixed(exp(-2.)) + +# Fit GP to data from experimental design +gp_from_model = GaussianProcess( + x, + y, + :y, + exp_design, + build_gp, + θ, + noise +) + +# Optimize hyperparameters (There should be a method that allows to do this on a already fitted gp instance) +opt_gp_from_model = GaussianProcess( + x, + y, + :y, + exp_design, + build_gp, + θ, + noise, + false, + false, + LBFGS() +) + +function plotdata() + plot(; xlabel="x", ylabel="y", legend=:bottomright) + return scatter!(df[:, 1], df[:, 2]; label="training data", ms=2, markerstrokewidth=0) +end + +plot_gp!(f; label) = plot!(f(sort!(sample(x, 100)[:, 1])); ribbon_scale=2, linewidth=1, label) + +plotdata() +plot_gp!(gp_from_model.gp; label="posterior f(⋅)") +plot_gp!(opt_gp_from_model.gp; label="posterior f(⋅) optimized") \ No newline at end of file diff --git a/demo/metamodels/gp_abstract_gps.jl b/demo/metamodels/gp_abstract_gps.jl deleted file mode 100644 index 9d9af3121..000000000 --- a/demo/metamodels/gp_abstract_gps.jl +++ /dev/null @@ -1,61 +0,0 @@ -using UncertaintyQuantification -using ParameterHandling -using AbstractGPs -using Random -using DataFrames - -using Zygote -using Optim -using Plots - -Random.seed!(20140430) - -## Training data -n = 10 - -# For interface with random input and model -x = RandomVariable(Uniform(0, 2π), :x) -y = Model( - df -> - (sin.(df.x) + 0.05*randn(length(df.x))), - :y, -) -exp_design = ExperimentalDesign(MonteCarlo(n)) - -# For interface with DataFrame -df = sample(x, n) -evaluate!(y, df) - -## Set up mean, kernel and noise -# mean -mean_params = (;) -mZero(θ) = ZeroMean() #Zero mean function - -# kernel -kernel_params = (; - σ = positive(1.), - ℓ = positive(1.) -) -kern(θ) = θ.σ^2 * with_lengthscale(SqExponentialKernel(), θ.ℓ) #Squared exponential kernel (note that hyperparameters are on the log scale) - -# noise -noise_params = (;noise = fixed(exp(-5.))) - -X = df[:, 1] -Y = df[:, 2] - -gp_prior = GP(mZero(mean_params), kern(ParameterHandling.value(kernel_params))) -fx = gp_prior(X, ParameterHandling.value(noise_params)[1]^2) -gp_post = posterior(fx, Y) - -y_gp = gpr.gp(sample(x, 10)[:, 1]) - -function plotdata() - plot(; xlabel="x", ylabel="y", legend=:bottomright) - return scatter!(df[:, 1], df[:, 2]; label="training data", ms=2, markerstrokewidth=0) -end - -plot_gp!(f; label) = plot!(f(sort!(sample(x, 100)[:, 1])); ribbon_scale=2, linewidth=1, label) - -plotdata() -plot_gp!(gp_post; label="posterior f(⋅)") \ No newline at end of file diff --git a/demo/metamodels/gp_tests.jl b/demo/metamodels/gp_tests.jl deleted file mode 100644 index 727b24450..000000000 --- a/demo/metamodels/gp_tests.jl +++ /dev/null @@ -1,73 +0,0 @@ -using UncertaintyQuantification -using ParameterHandling -using AbstractGPs -using Random -using DataFrames - -using Zygote -using Optim -using Plots - -Random.seed!(20140430) - -## Training data -n = 10 - -# For interface with random input and model -x = RandomVariable(Uniform(0, 2π), :x) -y = Model( - df -> - (sin.(df.x) + 0.05*randn(length(df.x))), - :y, -) -exp_design = ExperimentalDesign(MonteCarlo(n)) - -# For interface with DataFrame -df = sample(x, n) -evaluate!(y, df) - -## Set up mean, kernel and noise -# mean -mean_params = (;) -mZero(θ) = ZeroMean() #Zero mean function - -# kernel -kernel_params = (; - σ = positive(.9), - ℓ = positive(.9) -) -kern(θ) = θ.σ^2 * with_lengthscale(SqExponentialKernel(), θ.ℓ) #Squared exponential kernel (note that hyperparameters are on the log scale) - -# noise -noise_params = (;noise = fixed(exp(-2.))) - -θ = (;mean = mean_params, kernel = kernel_params, noise = noise_params) -flat_params, unflatten = value_flatten(θ) - -random_inputs = filter(i -> isa(i, RandomUQInput), [x]) -random_names = names(random_inputs) - -gpr = gaussianprocess( - [x], - y, - :y, - kern, - mZero, - kernel_params, - mean_params, - noise_params, - exp_design, - LBFGS() -) - -y_gp = gpr.gp(sample(x, 10)[:, 1]) - -function plotdata() - plot(; xlabel="x", ylabel="y", legend=:bottomright) - return scatter!(df[:, 1], df[:, 2]; label="training data", ms=2, markerstrokewidth=0) -end - -plot_gp!(f; label) = plot!(f(sort!(sample(x, 100)[:, 1])); ribbon_scale=2, linewidth=1, label) - -plotdata() -plot_gp!(gpr.gp; label="posterior f(⋅)") \ No newline at end of file From 8abd2cb6deac40cae8c89706c4b8ff70d526aba7 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 16:42:37 +0200 Subject: [PATCH 027/117] added a convenience method to get the name of a single UQInput --- src/inputs/inputs.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/inputs/inputs.jl b/src/inputs/inputs.jl index cb50aa43f..0d710490c 100644 --- a/src/inputs/inputs.jl +++ b/src/inputs/inputs.jl @@ -37,6 +37,8 @@ function names(inputs::Vector{<:UQInput}) return _names end +names(input::UQInput) = only(names([input])) # need this to get the name of a single input in gps + function count_rvs(inputs::Vector{<:UQInput}) random_inputs = filter(i -> isa(i, RandomUQInput) || isa(i, ProbabilityBox), inputs) return mapreduce(dimensions, +, random_inputs) From dc014ea08de4324e4f1d4336ba3e72dad06821f6 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 16:43:12 +0200 Subject: [PATCH 028/117] current version that works for univariate in- and output --- src/models/gp/datamanipulation.jl | 22 +++++++++++---- src/models/gp/gaussianprocess.jl | 46 ++++++++++++++++--------------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/models/gp/datamanipulation.jl b/src/models/gp/datamanipulation.jl index cd42b0ab3..b87e875a3 100644 --- a/src/models/gp/datamanipulation.jl +++ b/src/models/gp/datamanipulation.jl @@ -126,14 +126,19 @@ end function _handle_gp_input( data::DataFrame, - input::Symbol, + input::Union{Symbol, UQInput}, output::Symbol, normalize_inp::Bool=false, normalize_out::Bool=false ) inp_dim = 1 out_dim = 1 - inp_transformer = InputTransformer(data, input, normalize_inp) + + if isa(input, Symbol) + inp_transformer = InputTransformer(data, input, normalize_inp) + else + inp_transformer = UQInputTransformer(input, normalize_inp) + end out_transformer = OutputTransformer(data, output, normalize_out) # Turn DataFrame samples into X and Y arrays for GP @@ -148,14 +153,19 @@ end function _handle_gp_input( data::DataFrame, - inputs::Vector{Symbol}, + inputs::Union{Vector{Symbol}, Vector{UQInput}}, outputs::Vector{Symbol}, normalize_inp::Bool=false, normalize_out::Bool=false ) inp_dim = length(inputs) out_dim = length(outputs) - inp_transformer = InputTransformer(data, inputs, normalize_inp) + + if isa(input, Symbol) + inp_transformer = InputTransformer(data, input, normalize_inp) + else + inp_transformer = UQInputTransformer(input, normalize_inp) + end out_transformer = OutputTransformer(data, output, normalize_out) # Turn DataFrame samples into X and Y arrays for GP @@ -171,7 +181,7 @@ end function _handle_gp_input( data::DataFrame, - inputs::Vector{Symbol}, + inputs::Union{Vector{Symbol}, Vector{UQInput}}, output::Symbol, normalize_inp::Bool=false, normalize_out::Bool=false @@ -184,7 +194,7 @@ end function _handle_gp_input( data::DataFrame, - input::Symbol, + input::Union{Symbol, UQInput}, outputs::Vector{Symbol}, normalize_inp::Bool=false, normalize_out::Bool=false diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index c390db36a..e6d8e5155 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -13,7 +13,7 @@ ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutativ """ struct GaussianProcess <: UQModel gp::AbstractGPs.AbstractGP - input::Union{Vector{<:UQInput}, Vector{Symbol}} + input::Vector{Symbol} output::Vector{Symbol} inp_dim::Int out_dim::Int @@ -23,7 +23,7 @@ end function GaussianProcess( gp::AbstractGPs.AbstractGP, - input::Union{UQInput, Symbol}, + input::Symbol, output::Symbol, inp_dim::Int, out_dim::Int, @@ -39,7 +39,7 @@ end function GaussianProcess( gp::AbstractGPs.AbstractGP, - input::Union{Vector{<:UQInput}, Vector{Symbol}}, + input::Vector{Symbol}, output::Symbol, inp_dim::Int, out_dim::Int, @@ -55,7 +55,7 @@ end function GaussianProcess( gp::AbstractGPs.AbstractGP, - input::Union{UQInput, Symbol}, + input::Symbol, output::Vector{Symbol}, inp_dim::Int, out_dim::Int, @@ -69,20 +69,6 @@ function GaussianProcess( ) end -function GaussianProcess( - gp::AbstractGPs.AbstractGP, - input::Union{Vector{<:UQInput}, Vector{Symbol}}, - output::Symbol, - inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well - out_transformer::AbstractOutputTransformer, # leaving that for later -) - GaussianProcess( - gp, [input], [output], - 1, 1, - inp_transformer, out_transformer - ) -end - """ """ NoiseTypes = Union{ ParameterHandling.Positive, @@ -217,11 +203,27 @@ function GaussianProcess( data = sample(inputs, exp_design.sim) # need to be able to pass experimental design evaluate!(model, data) - return GaussianProcess( + # random_inputs = filter(i -> isa(i, RandomUQInput), inputs) + # random_input_names = names(random_inputs) + + (inp_dim, out_dim, + inp_transformer, out_transformer, + x, y) = _handle_gp_input( data, inputs, outputs, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer + normalize_inp, normalize_out + ) + + θ = (; + mean_and_kernel = params, + noise = (;noise_params = noise) + ) + + gp = build_gp_posterior(build_gp, θ, x, y, optimizer) + + return GaussianProcess( + gp, inputs.name, outputs, + inp_dim, out_dim, + inp_transformer, out_transformer ) end From d6e52d2910d07bfa44825ff45f54e09471a0684e Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 16:43:40 +0200 Subject: [PATCH 029/117] added includes and exports for gps --- src/UncertaintyQuantification.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index 4b4bbc850..98798c6af 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -75,6 +75,7 @@ export UQModel export AdvancedLineSampling export AdaptiveMetropolisHastings export EmpiricalDistribution +export ExperimentalDesign # Currently used for gps export BackwardFiniteDifferences export BoxBehnken export CentralComposite @@ -89,6 +90,7 @@ export ForwardFiniteDifferences export FractionalFactorial export FullFactorial export GaussianCopula +export GaussianProcess export GaussQuadrature export HaltonSampling export HermiteBasis @@ -168,6 +170,8 @@ include("models/imprecise/propagation.jl") include("models/polyharmonicspline.jl") include("models/responsesurface.jl") include("models//slicingmodel.jl") +include("models/gp/datamanipulation.jl") +include("models/gp/gaussianprocess.jl") include("hpc/slurm.jl") From 0dd38ed90c249efcfdbaf87e67051f252d2e3c23 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 19:08:24 +0200 Subject: [PATCH 030/117] using only instead of X[1] --- src/models/gp/datamanipulation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/models/gp/datamanipulation.jl b/src/models/gp/datamanipulation.jl index b87e875a3..ef5d947ef 100644 --- a/src/models/gp/datamanipulation.jl +++ b/src/models/gp/datamanipulation.jl @@ -120,7 +120,7 @@ function _dataframe_to_array( # That name sucks names::Union{Vector{<:Symbol}, Vector{<:String}} # do we use Strings? ) # check for the case where we want a single column but the name is given in a Vector - length(names) == 1 ? X = _dataframe_to_array(df, names[1]) : X = Matrix(df[:, names]) + length(names) == 1 ? X = _dataframe_to_array(df, only(names)) : X = Matrix(df[:, names]) return X end @@ -160,7 +160,7 @@ function _handle_gp_input( ) inp_dim = length(inputs) out_dim = length(outputs) - + if isa(input, Symbol) inp_transformer = InputTransformer(data, input, normalize_inp) else From 270a9ce4438b328bf8c6506dc2e94135c4e1b2c5 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 19:24:34 +0200 Subject: [PATCH 031/117] transform copy of DataFrame to handle 1D case --- src/models/gp/datamanipulation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/gp/datamanipulation.jl b/src/models/gp/datamanipulation.jl index ef5d947ef..15d65482a 100644 --- a/src/models/gp/datamanipulation.jl +++ b/src/models/gp/datamanipulation.jl @@ -48,7 +48,7 @@ end function (transformer::UQInputTransformer)(df::DataFrame) if transformer.normalize uqinput_names = names(transformer.uqinputs) - data = df[:, uqinput_names] + data = copy(df) to_standard_normal_space!(transformer.uqinputs, data) # X is a Matrix for multiple inputs, else it is a Vector X = _dataframe_to_array(data, uqinput_names) From 5f9a8cdd78ff2e66944cb13d6b8d23dde30896cb Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 27 Sep 2024 19:26:56 +0200 Subject: [PATCH 032/117] started adding unit tests --- test/models/gp/datamanipulation.jl | 85 ++++++++++++++++++++++++++++++ test/models/gp/gaussianprocess.jl | 0 2 files changed, 85 insertions(+) create mode 100644 test/models/gp/datamanipulation.jl create mode 100644 test/models/gp/gaussianprocess.jl diff --git a/test/models/gp/datamanipulation.jl b/test/models/gp/datamanipulation.jl new file mode 100644 index 000000000..3b18759d3 --- /dev/null +++ b/test/models/gp/datamanipulation.jl @@ -0,0 +1,85 @@ +@testset "GaussianProcessDataManipulation" begin + single_input = RandomVariable(Normal(-1, 0.5), :x1) + single_input_vector = [single_input] + multi_input = RandomVariable.([Uniform(-2, 0), Normal(-1, 0.5), Uniform(0, 1)], [:x1, :x2, :x3]) + + df_single = sample(single_input, 10) + df_single_vector = sample(single_input_vector, 10) + df_multi = sample(multi_input, 10) + + @testset "InputTransformer" begin + # Check 1D input + single_input_transformer_no = InputTransformer(df_single, names(single_input), false) + single_input_transformer_zsc = InputTransformer(df_single, names(single_input), true) + + @test all(single_input_transformer_no(df_single) .== df_single[:, 1]) + @test all( + single_input_transformer_zsc(df_single) .≈ + (df_single[:, 1] .- mean(df_single[:, 1])) / std(df_single[:, 1]) + ) + + # Check 1D input passed in a Vector + single_input_vector_transformer_no = InputTransformer( + df_single_vector, names(single_input_vector), false + ) + single_input_vector_transformer_zsc = InputTransformer( + df_single_vector, names(single_input_vector), true + ) + + @test all(single_input_vector_transformer_no(df_single_vector) .== df_single_vector[:, 1]) + @test all( + single_input_vector_transformer_zsc(df_single_vector) .≈ + (df_single_vector[:, 1] .- mean(df_single_vector[:, 1])) / std(df_single_vector[:, 1]) + ) + + # Check ND input + multi_input_transformer_no = InputTransformer(df_multi, names(multi_input), false) + multi_input_transformer_zsc = InputTransformer(df_multi, names(multi_input), true) + + df_as_matrix = Matrix(df_multi) + mean_ = mean(df_as_matrix; dims=1) + std_ = std(df_as_matrix; dims=1) + for (i, col) in enumerate(eachcol(df_as_matrix)) + df_as_matrix[:, i] .= (col .- mean_[1, i]) / std_[1, i] + end + + @test all(multi_input_transformer_no(df_multi) .== Matrix(df_multi)) + @test all(multi_input_transformer_zsc(df_multi) .≈ df_as_matrix) + end + + @testset "UQInputTransformer" begin + # Check 1D input + single_input_transformer_no = UQInputTransformer(single_input, false) + single_input_transformer_sns = UQInputTransformer(single_input, true) + + df_copy_sns = copy(df_single) + to_standard_normal_space!(single_input, df_copy_sns) + + @test all(single_input_transformer_no(df_single) .== df_single[:, 1]) + @test all(single_input_transformer_sns(df_single) .== df_copy_sns[:, 1]) + + # Check 1D input passed in a Vector + single_input_vector_transformer_no = UQInputTransformer(single_input_vector, false) + single_input_vector_transformer_sns = UQInputTransformer(single_input_vector, true) + + df_copy_sns = copy(df_single_vector) + to_standard_normal_space!(single_input_vector, df_copy_sns) + + @test all(single_input_transformer_no(df_single_vector) .== df_single_vector[:, 1]) + @test all(single_input_transformer_sns(df_single_vector) .== df_copy_sns[:, 1]) + + # Check ND input + multi_input_transformer_no = UQInputTransformer(multi_input, false) + multi_input_transformer_sns = UQInputTransformer(multi_input, true) + + df_copy_sns = copy(df_multi) + to_standard_normal_space!(multi_input, df_copy_sns) + + @test all(multi_input_transformer_no(df_multi) .== Matrix(df_multi)) + @test all(multi_input_transformer_sns(df_multi) .== Matrix(df_copy_sns)) + end + + @testset "OutputTransformer" begin + #: TODO + end +end diff --git a/test/models/gp/gaussianprocess.jl b/test/models/gp/gaussianprocess.jl new file mode 100644 index 000000000..e69de29bb From 895949d5f24c534bdb68dec74dd8186dee44f573 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 10 Feb 2025 20:22:15 +0100 Subject: [PATCH 033/117] Automatically extract parameters from GP model --- src/models/gp/parameterization.jl | 136 ++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 src/models/gp/parameterization.jl diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl new file mode 100644 index 000000000..26a82926b --- /dev/null +++ b/src/models/gp/parameterization.jl @@ -0,0 +1,136 @@ +struct Parameterized{T} + object::T +end + +function (p::Parameterized)(θ) + return apply_parameters(p.object, ParameterHandling.value(θ)) +end + +""" + parameterize(object) -> model, θ + +Turn `object` into a callable parameterized version of itself and a parameter `θ`. +After assigning `model, θ = parameterize(object)`, calling `model(θ)` will yield the same +`object` back. +""" +parameterize(object) = Parameterized(object), extract_parameters(object) + +# Mean functions +extract_parameters(::ZeroMean) = nothing +apply_parameters(m::ZeroMean, θ) = m + +extract_parameters(m::ConstMean) = m.c +apply_parameters(::ConstMean, θ) = ConstMean(θ) + +# Simple kernels +KernelsWithoutParameters = Union{SEKernel,Matern32Kernel,Matern52Kernel,WhiteKernel} + +extract_parameters(::T) where {T<:KernelsWithoutParameters} = nothing +apply_parameters(k::T, θ) where {T<:KernelsWithoutParameters} = k + +extract_parameters(k::PeriodicKernel) = ParameterHandling.positive(only(k.r)) +apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=[θ]) + +extract_parameters(k::RationalQuadraticKernel) = ParameterHandling.positive(only(k.α)) +apply_parameters(k::RationalQuadraticKernel, θ) = RationalQuadraticKernel(; α=θ, metric=k.metric) + +# Composite kernels +extract_parameters(k::KernelSum) = map(extract_parameters, k.kernels) +apply_parameters(k::KernelSum, θ) = KernelSum(map(apply_parameters, k.kernels, θ)) + +extract_parameters(k::KernelProduct) = map(extract_parameters, k.kernels) +apply_parameters(k::KernelProduct, θ) = KernelProduct(map(apply_parameters, k.kernels, θ)) + +extract_parameters(k::TransformedKernel) = (extract_parameters(k.kernel), extract_parameters(k.transform)) +apply_parameters(k::TransformedKernel, θ) = TransformedKernel( + apply_parameters(k.kernel, θ[1]), apply_parameters(k.transform, θ[2]) + ) + +extract_parameters(k::ScaledKernel) = (extract_parameters(k.kernel), ParameterHandling.positive(only(k.σ²))) +apply_parameters(k::ScaledKernel, θ) = ScaledKernel(apply_parameters(k.kernel, θ[1]), θ[2]) + +# Transforms +extract_parameters(t::ScaleTransform) = ParameterHandling.positive(only(t.s)) +apply_parameters(::ScaleTransform, θ) = ScaleTransform(θ) + +# # Likelihoods +# extract_parameters(::BernoulliLikelihood) = nothing +# apply_parameters(l::BernoulliLikelihood, θ) = l +# _isequal(l1::T, l2::T) where {T<:BernoulliLikelihood} = true + +# extract_parameters(::PoissonLikelihood) = nothing +# apply_parameters(l::PoissonLikelihood, θ) = l +# _isequal(l1::T, l2::T) where {T<:PoissonLikelihood} = true + +# GPs +extract_parameters(f::GP) = (extract_parameters(f.mean), extract_parameters(f.kernel)) +apply_parameters(f::GP, θ) = GP( + apply_parameters(f.mean, θ[1]), apply_parameters(f.kernel, θ[2]) + ) + +# extract_parameters(f::LatentGP) = (extract_parameters(f.f), extract_parameters(f.lik)) +# function apply_parameters(f::LatentGP, θ) +# return LatentGP(apply_parameters(f.f, θ[1]), apply_parameters(f.lik, θ[2]), f.Σy) +# end + +# # Approximations +# const SVA = SparseVariationalApproximation + +# function extract_parameters(sva::SVA, fixed_inducing_points::Bool) +# fz_par = fixed_inducing_points ? nothing : collect(sva.fz.x) +# q_par = extract_parameters(sva.q) +# return (fz_par, q_par) +# end + +# function apply_parameters(sva::SVA, θ) +# fz = isnothing(θ[1]) ? sva.fz : sva.fz.f(θ[1]) +# q = apply_parameters(sva.q, θ[2]) +# return SVA(fz, q) +# end + +# variational_gaussian(n::Int, T=Float64) = MvNormal(zeros(T, n), Matrix{T}(I, n, n)) + +# # Distributions +# extract_parameters(d::MvNormal) = (d.μ, ParameterHandling.positive_definite(d.Σ)) +# apply_parameters(::MvNormal, θ) = MvNormal(θ[1], θ[2]) +# _isequal(d1::MvNormal, d2::MvNormal) = isapprox(d1.μ, d1.μ) && isapprox(d1.Σ, d2.Σ) + +# Custom wrappers +struct NoisyGP{T<:GP,Tn<:Real} + gp::T + obs_noise::Tn +end + +(gp::NoisyGP)(x) = gp.gp(x, gp.obs_noise) + +with_gaussian_noise(gp::GP, obs_noise::Real) = NoisyGP(gp, obs_noise) + +extract_parameters(f::NoisyGP) = ( + extract_parameters(f.gp), + ParameterHandling.positive(f.obs_noise, exp, 1e-6) + ) +apply_parameters(f::NoisyGP, θ) = NoisyGP(apply_parameters(f.gp, θ[1]), θ[2]) + +# function _isequal(f1::NoisyGP, f2::NoisyGP) +# return _isequal(f1.gp, f2.gp) && isapprox(f1.obs_noise, f2.obs_noise) +# end + +# struct SVGP{T<:LatentGP,Ts<:SVA} +# lgp::T +# sva::Ts +# fixed_inducing_points::Bool +# end + +# SVGP(lgp, sva; fixed_inducing_points) = SVGP(lgp, sva, fixed_inducing_points) + +# function extract_parameters(f::SVGP) +# return (extract_parameters(f.lgp), extract_parameters(f.sva, f.fixed_inducing_points)) +# end + +# function apply_parameters(f::SVGP, θ) +# lgp = apply_parameters(f.lgp, θ[1]) +# sva = apply_parameters(f.sva, θ[2]) +# return SVGP(lgp, SVA(lgp(sva.fz.x).fx, sva.q), f.fixed_inducing_points) +# end + +# costfunction(svgp::SVGP, data) = -elbo(svgp.sva, svgp.lgp(data.x), data.y) \ No newline at end of file From 3ae037fdd378175c47c031e4cc1370848a99e80b Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 10 Feb 2025 20:23:25 +0100 Subject: [PATCH 034/117] Normalization of in- and outputs - TODO: Seperate placeholders and construction of normalizers --- src/models/gp/normalization.jl | 84 ++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 src/models/gp/normalization.jl diff --git a/src/models/gp/normalization.jl b/src/models/gp/normalization.jl new file mode 100644 index 000000000..f6bbbd1b0 --- /dev/null +++ b/src/models/gp/normalization.jl @@ -0,0 +1,84 @@ +abstract type AbstractInputTransform end +abstract type AbstractOutputTransform end + +### No tranform, default +struct NoInputTransform <: AbstractInputTransform end + +function (transform::NoInputTransform)(df::DataFrame, input_names::Union{Symbol, Vector{<:Symbol}}) + X = _dataframe_to_array(df, input_names) + return X +end + +### ZScore transform +struct InputTransform <: AbstractInputTransform + transform::StatsBase.ZScoreTransform + input_names::Union{Symbol, Vector{<:Symbol}} +end + +# Construct from DataFrame +function InputTransform( + df::DataFrame, + input_names::Union{Symbol, Vector{<:Symbol}} +) + X = _dataframe_to_array(df, input_names) + transform = fit(ZScoreTransform, X; dims=1) + return InputTransform(transform, input_names) +end + +function (transform::InputTransform)(df::DataFrame) + X = _dataframe_to_array(df, transform.input_names) + return StatsBase.transform(transform.transform, X) +end + +### Standard normal transform +struct UQInputTransform <: AbstractInputTransform + uqinputs::Union{UQInput, Vector{<:UQInput}} +end + +function (transform::UQInputTransform)(df::DataFrame) + df_copy = copy(df) + to_standard_normal_space!(names(transform.uqinputs), df_copy) + # X is a Matrix for multiple inputs, else it is a Vector + X = _dataframe_to_array(df_copy, uqinput_names) + return X +end + +### No Outputtransform, default +struct NoOutputTransform <: AbstractOutputTransform + # we do not support multioutput yet + output_names::Union{Symbol, Vector{<:Symbol}} # need to handle uqinput +end + +function (transform::NoOutputTransform)(df::DataFrame) + Y = _dataframe_to_array(df, transform.output_names) + return Y +end + +function inverse_transform(Y::Array, transform::NoOutputTransform) + return Y +end + +### ZScore output transform +struct OutputTransform <: AbstractOutputTransform + transform::StatsBase.ZScoreTransform # there is probably a better way to do this + output_names::Union{Symbol, Vector{<:Symbol}} +end + +# Construct from DataFrame +function OutputTransform( + df::DataFrame, + output_names::Union{Symbol, Vector{<:Symbol}} +) + Y = _dataframe_to_array(df, output_names) # will fail if Y is not an array + transform = fit(ZScoreTransform, Y; dims=1) + return OutputTransform(transform, output_names) +end + +function (transform::OutputTransform)(df::DataFrame) + Y = _dataframe_to_array(df, transform.output_names) + return StatsBase.transform(transform.transform, Y) +end + +function inverse_transform(Y::Array, transform::OutputTransform) + return StatsBase.reconstruct(transform.transform, Y) +end \ No newline at end of file From c0be0ff1fa225e2a03dd711c68237d6a1353023e Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 10 Feb 2025 20:24:06 +0100 Subject: [PATCH 035/117] Cleaned up hyperparameter optimization --- src/models/gp/hyperparametertuning.jl | 48 +++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 src/models/gp/hyperparametertuning.jl diff --git a/src/models/gp/hyperparametertuning.jl b/src/models/gp/hyperparametertuning.jl new file mode 100644 index 000000000..cf6ffda23 --- /dev/null +++ b/src/models/gp/hyperparametertuning.jl @@ -0,0 +1,48 @@ +abstract type AbstractHyperparameterOptimization end + +struct NoOptimization <: AbstractHyperparameterOptimization end + +function optimize_hyperparameters(gp::Union{AbstractGPs.GP, NoisyGP}, x, y, mle::NoOptimization) #!TYPES + # This is completely unnecessary, should maybe write a GP method for NoOpt + model, θ₀ = parameterize(gp) + flatparams, unflatten = ParameterHandling.flatten(θ₀) + return model(unflatten(flatparams)), unflatten(flatparams) +end + +struct MLE <: AbstractHyperparameterOptimization + optimizer::Optim.FirstOrderOptimizer + options::Optim.Options +end + +MLE() = MLE(Optim.LBFGS(), Optim.Options(; iterations=1000, show_trace=true)) + +objective(f::Union{AbstractGPs.GP, NoisyGP}, x, y, mle::MLE) = -logpdf(f(x), y) + +function optimize_hyperparameters(gp::Union{AbstractGPs.GP, NoisyGP}, x, y, mle::MLE) #!TYPES + model, θ₀ = parameterize(gp) + flatparams, unflatten = ParameterHandling.flatten(θ₀) + + ## https://julianlsolvers.github.io/Optim.jl/stable/#user/tipsandtricks/#avoid-repeating-computations + function fg!(F, G, θ) + if F !== nothing && G !== nothing + val, grad = Zygote.withgradient( + θ -> objective(model(unflatten(θ)), x, y, mle), + θ + ) + G .= only(grad) + return val + elseif G !== nothing + grad = Zygote.gradient( + θ -> objective(model(unflatten(θ)), x, y, mle), + θ + ) + G .= only(grad) + return nothing + elseif F !== nothing + return objective(model(unflatten(θ)), x, y, mle) + end + end + + result = optimize(Optim.only_fg!(fg!), flatparams, mle.optimizer, mle.options; inplace=false) + return model(unflatten(result.minimizer)), unflatten(result.minimizer) +end \ No newline at end of file From 33b1782154a756cab292f5466eb234a9db31df8a Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 10 Feb 2025 20:24:42 +0100 Subject: [PATCH 036/117] Started restructuring GaussianProcess constructors --- src/models/gp/gaussianprocess.jl | 358 +++++-------------------------- 1 file changed, 50 insertions(+), 308 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index e6d8e5155..27f01d500 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -12,108 +12,72 @@ ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutativ ``` """ struct GaussianProcess <: UQModel - gp::AbstractGPs.AbstractGP + gp::AbstractGPs.FiniteGP input::Vector{Symbol} - output::Vector{Symbol} - inp_dim::Int - out_dim::Int - inp_transformer::AbstractInputTransformer # not sure if these should transform hyperparams as well - out_transformer::AbstractOutputTransformer + output::Symbol + input_transform::AbstractInputTransform # not sure if these should transform hyperparams as well + output_transform::AbstractOutputTransform end -function GaussianProcess( - gp::AbstractGPs.AbstractGP, - input::Symbol, +""" ---------------------------------------------------------------------------------------------------- """ +# names = propertynames(data[:, Not(output)]) +## DATAFRAME INPUT OUTPUT STUFF +function GaussianProcess( + gp::AbstractGPs.GP, + data::DataFrame, output::Symbol, - inp_dim::Int, - out_dim::Int, - inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well - out_transformer::AbstractOutputTransformer, # leaving that for later -) - GaussianProcess( - gp, [input], [output], - inp_dim, out_dim, - inp_transformer, out_transformer - ) + input_transform::NoInputTransform, + output_transform::NoOutputTransform, + optimization::NoOptimization +) # should we use keyword args? + + return end -function GaussianProcess( - gp::AbstractGPs.AbstractGP, - input::Vector{Symbol}, +function GaussianProcess( + gp::AbstractGPs.GP, + data::DataFrame, output::Symbol, - inp_dim::Int, - out_dim::Int, - inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well - out_transformer::AbstractOutputTransformer, # leaving that for later -) - GaussianProcess( - gp, input, [output], - inp_dim, out_dim, - inp_transformer, out_transformer - ) +) # should we use keyword args? + return GaussianProcess( + gp, + data, + output, + NoInputTransform(), + NoOutputTransform(), + NoOptimization() + ) end -function GaussianProcess( - gp::AbstractGPs.AbstractGP, - input::Symbol, - output::Vector{Symbol}, - inp_dim::Int, - out_dim::Int, - inp_transformer::AbstractInputTransformer, # not sure if these should transform hyperparams as well - out_transformer::AbstractOutputTransformer, # leaving that for later -) - GaussianProcess( - gp, [input], output, - inp_dim, out_dim, - inp_transformer, out_transformer - ) -end +function GaussianProcess( + gp::AbstractGPs.GP, + data::DataFrame, + output::Symbol, + input_transform::NoInputTransform, + output_transform::NoOutputTransform, + optimization::AbstractHyperparameterOptimization +) # should we use keyword args? -""" """ -NoiseTypes = Union{ - ParameterHandling.Positive, - ParameterHandling.Bounded, - ParameterHandling.Fixed - } + return +end -# Custom meanfunctions will break Zygote autodiff for multidimensional inputs -# Create from DataFrame function GaussianProcess( + gp::AbstractGPs.GP, data::DataFrame, - inputs::Symbol, - outputs::Symbol, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing + output::Symbol; + input_transform::AbstractInputTransformer, + output_transform::AbstractOutputTransformer, + optimization::AbstractHyperparameterOptimization ) - (inp_dim, out_dim, - inp_transformer, out_transformer, - x, y) = _handle_gp_input( - data, inputs, outputs, - normalize_inp, normalize_out - ) - - θ = (; - mean_and_kernel = params, - noise = (;noise_params = noise) - ) - - gp = build_gp_posterior(build_gp, θ, x, y, optimizer) - - return GaussianProcess( - gp, inputs, outputs, - inp_dim, out_dim, - inp_transformer, out_transformer - ) + return end +# Custom meanfunctions will break Zygote autodiff for multidimensional inputs +# Create from DataFrame function GaussianProcess( data::DataFrame, - inputs::Vector{Symbol}, - outputs::Vector{Symbol}, + inputs::Symbol, + outputs::Symbol, build_gp::Function, params::NamedTuple, noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... @@ -142,44 +106,8 @@ function GaussianProcess( ) end -function GaussianProcess( - data::DataFrame, - inputs::Vector{Symbol}, - outputs::Symbol, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - return GaussianProcess( - data, inputs, [outputs], - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - -function GaussianProcess( - data::DataFrame, - inputs::Symbol, - outputs::Vector{Symbol}, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - return GaussianProcess( - data, [inputs], outputs, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - +""" ---------------------------------------------------------------------------------------------------- """ +## UQMODEL STUFF """ experimental design """ struct ExperimentalDesign # not sure about the name sim::AbstractMonteCarlo # could also allow doe @@ -227,151 +155,6 @@ function GaussianProcess( ) end -# need to treat this differently because univariate in- and output -function GaussianProcess( - inputs::UQInput, - model::UQModel, - outputs::Symbol, - exp_design::ExperimentalDesign, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - return GaussianProcess( - inputs, [model], outputs, exp_design, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - -# All these cases dispatch to the same method -function GaussianProcess( - inputs::Vector{<:UQInput}, - model::Vector{<:UQModel}, - outputs::Vector{Symbol}, - exp_design::ExperimentalDesign, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - data = sample(inputs, exp_design.sim) # need to be able to pass experimental design - evaluate!(model, data) - - return GaussianProcess( - data, inputs, outputs, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - -function GaussianProcess( - inputs::Vector{<:UQInput}, - model::UQModel, - outputs::Vector{Symbol}, - exp_design::ExperimentalDesign, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - return GaussianProcess( - inputs, [model], outputs, exp_design, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - -function GaussianProcess( - inputs::Vector{<:UQInput}, - model::UQModel, - outputs::Symbol, - exp_design::ExperimentalDesign, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - return GaussianProcess( - inputs, [model], [outputs], exp_design, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - -function GaussianProcess( - inputs::UQInput, - model::Vector{<:UQModel}, - outputs::Vector{Symbol}, - exp_design::ExperimentalDesign, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - return GaussianProcess( - [inputs], model, outputs, exp_design, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - -function GaussianProcess( - inputs::UQInput, - model::UQModel, - outputs::Vector{Symbol}, - exp_design::ExperimentalDesign, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - return GaussianProcess( - [inputs], [model], outputs, exp_design, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - -function GaussianProcess( - inputs::Vector{<:UQInput}, - model::Vector{<:UQModel}, - outputs::Symbol, - exp_design::ExperimentalDesign, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - return GaussianProcess( - inputs, model, [outputs], exp_design, - build_gp, params, noise, - normalize_inp, normalize_out, - optimizer - ) -end - # what should this calculate? Calculates mean for now function evaluate!(gp::GaussianProcess, df::DataFrame) # this now gives mean and variance at input x = gp.inp_transformer(df) @@ -393,47 +176,6 @@ function evaluate!(gp::GaussianProcess, df::DataFrame) # this now gives mean and return nothing end -""" maximize_logml(logml, θ, x, y, build_gp; optimizer=optimizer) """ -function logml(θ, x, y, build_gp) - gp = build_gp(ParameterHandling.value(θ.mean_and_kernel)) - f = gp( - x, - only(ParameterHandling.value(θ.noise))^2 # same as in GaussianProcess... - ) - return -logpdf(f, y) -end - -function maximize_logml(logml, θ, x, y, build_gp; optimizer, maxiter=1_000) - options = Optim.Options(; iterations=maxiter, show_trace=true) - - θ_flat, unflatten = ParameterHandling.value_flatten(θ) - - ## https://julianlsolvers.github.io/Optim.jl/stable/#user/tipsandtricks/#avoid-repeating-computations - function fg!(F, G, θᵢ) - if F !== nothing && G !== nothing - val, grad = Zygote.withgradient( - θᵢ -> logml(unflatten(θᵢ), x, y, build_gp), - θᵢ - ) - G .= only(grad) - return val - elseif G !== nothing - grad = Zygote.gradient( - θᵢ -> logml(unflatten(θᵢ), x, y, build_gp), - θᵢ - ) - G .= only(grad) - return nothing - elseif F !== nothing - return logml(unflatten(θᵢ), x, y, build_gp) - end - end - - result = optimize(Optim.only_fg!(fg!), θ_flat, optimizer, options; inplace=false) - - return unflatten(result.minimizer), result -end - function build_gp_posterior( build_gp::Function, θ::NamedTuple, From fb9e15a80d1f6551f56760461339d3d623542e75 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 10 Feb 2025 20:25:16 +0100 Subject: [PATCH 037/117] Moved normalization to other file --- src/models/gp/datamanipulation.jl | 151 ------------------------------ 1 file changed, 151 deletions(-) diff --git a/src/models/gp/datamanipulation.jl b/src/models/gp/datamanipulation.jl index 15d65482a..e89158eef 100644 --- a/src/models/gp/datamanipulation.jl +++ b/src/models/gp/datamanipulation.jl @@ -1,113 +1,3 @@ -abstract type AbstractInputTransformer end -abstract type AbstractOutputTransformer end - -struct InputTransformer <: AbstractInputTransformer - transform::Union{ZScoreTransform, Nothing} # there is probably a better way to do this - input_names::Union{Symbol, Vector{<:Symbol}} - normalize::Bool -end - -# Construct from DataFrame -function InputTransformer( - df::DataFrame, - input_names::Union{Symbol, Vector{<:Symbol}}, - normalize::Bool -) - if normalize - X = _dataframe_to_array(df, input_names) - normalization = fit(ZScoreTransform, X; dims=1) - InputTransformer( - normalization, - input_names, - normalize - ) - else - InputTransformer( - nothing, - input_names, - normalize - ) - end -end - -function (transformer::InputTransformer)(df::DataFrame) - if !isnothing(transformer.transform) - X = _dataframe_to_array(df, transformer.input_names) - return StatsBase.transform(transformer.transform, X) - else - X = _dataframe_to_array(df, transformer.input_names) - return X - end -end - -struct UQInputTransformer <: AbstractInputTransformer - uqinputs::Union{UQInput, Vector{<:UQInput}} - normalize::Bool -end - -function (transformer::UQInputTransformer)(df::DataFrame) - if transformer.normalize - uqinput_names = names(transformer.uqinputs) - data = copy(df) - to_standard_normal_space!(transformer.uqinputs, data) - # X is a Matrix for multiple inputs, else it is a Vector - X = _dataframe_to_array(data, uqinput_names) - return X - else - uqinput_names = names(transformer.uqinputs) - # X is a Matrix for multiple inputs, else it is a Vector - X = _dataframe_to_array(df, uqinput_names) - return X - end -end - -struct OutputTransformer <: AbstractOutputTransformer - transform::Union{ZScoreTransform, Nothing} # there is probably a better way to do this - output_names::Union{Symbol, Vector{<:Symbol}} - normalize::Bool -end - -# Construct from DataFrame -function OutputTransformer( - df::DataFrame, - output_names::Union{Symbol, Vector{<:Symbol}}, - normalize::Bool -) - if normalize - Y = _dataframe_to_array(df, output_names) - normalization = fit(ZScoreTransform, Y; dims=1) - OutputTransformer( - normalization, - output_names, - normalize - ) - else - OutputTransformer( - nothing, - output_names, - normalize - ) - end -end - -function (transformer::AbstractOutputTransformer)(df::DataFrame) - if !isnothing(transformer.transform) - Y = _dataframe_to_array(df, transformer.output_names) - return StatsBase.transform(transformer.transform, Y) - else - Y = _dataframe_to_array(df, transformer.output_names) - return Y - end -end - -function inverse_transform(Y::Array, transformer::AbstractOutputTransformer) - if !isnothing(transformer.transform) - return StatsBase.reconstruct(transformer.transform, Y) - else - return Y - end -end - function _dataframe_to_array( # That name sucks df::DataFrame, name::Union{Symbol, String} # do we use Strings? @@ -151,34 +41,6 @@ function _handle_gp_input( x, y) end -function _handle_gp_input( - data::DataFrame, - inputs::Union{Vector{Symbol}, Vector{UQInput}}, - outputs::Vector{Symbol}, - normalize_inp::Bool=false, - normalize_out::Bool=false -) - inp_dim = length(inputs) - out_dim = length(outputs) - - if isa(input, Symbol) - inp_transformer = InputTransformer(data, input, normalize_inp) - else - inp_transformer = UQInputTransformer(input, normalize_inp) - end - out_transformer = OutputTransformer(data, output, normalize_out) - - # Turn DataFrame samples into X and Y arrays for GP - X = inp_transformer(data) - Y = out_transformer(data) - x, y = prepare_isotopic_multi_output_data(RowVecs(X), RowVecs(Y)) - - return ( - inp_dim, out_dim, - inp_transformer, out_transformer, - x, y) -end - function _handle_gp_input( data::DataFrame, inputs::Union{Vector{Symbol}, Vector{UQInput}}, @@ -190,17 +52,4 @@ function _handle_gp_input( data, inputs, [output], normalize_inp, normalize_out ) -end - -function _handle_gp_input( - data::DataFrame, - input::Union{Symbol, UQInput}, - outputs::Vector{Symbol}, - normalize_inp::Bool=false, - normalize_out::Bool=false -) - return _handle_gp_input( - data, [input], outputs, - normalize_inp, normalize_out - ) end \ No newline at end of file From db0202d6540485ba2bf072e16c3bfa7b91dd3627 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 11 Feb 2025 19:08:21 +0100 Subject: [PATCH 038/117] Store in- and output standardization in single struct --- src/models/gp/standardization.jl | 139 +++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 src/models/gp/standardization.jl diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl new file mode 100644 index 000000000..4dd59984f --- /dev/null +++ b/src/models/gp/standardization.jl @@ -0,0 +1,139 @@ +struct StandardizeInput + flag::Bool +end + +StandardizeInput() = StandardizeInput(true) + +struct StandardizeOutput + flag::Bool +end + +StandardizeOutput() = StandardizeOutput(true) + +# DataTransform +abstract type AbstractInputTransform end +abstract type AbstractOutputTransform end + +struct DataTransform + input::AbstractInputTransform + output::AbstractOutputTransform +end + +function DataTransform( + data::DataFrame, + instandard::StandardizeInput, + input::Union{Symbol, Vector{<:Symbol}}, + outstandard::StandardizeOutput, + output::Symbol +) + intransform = instandard.flag ? InputTransform(data, input) : NoInputTransform(input) + outtransform = outstandard.flag ? OutputTransform(data, output) : NoOutputTransform(output) + return DataTransform(intransform, outtransform) +end + +function DataTransform( + data::DataFrame, + instandard::StandardizeInput, + input::Union{UQInput, Vector{<:UQInput}}, + outstandard::StandardizeOutput, + output::Symbol +) + intransform = instandard.flag ? UQInputTransform(input) : NoInputTransform(input) + outtransform = outstandard.flag ? OutputTransform(data, output) : NoOutputTransform(output) + return DataTransform(intransform, outtransform) +end + +# Utility functions to handle DataFrames +function _dataframe_to_array( + df::DataFrame, + name::Symbol +) + return df[:, name] +end + +function _dataframe_to_array( + df::DataFrame, + names::Vector{<:Symbol} +) + # check for the case where we want a single column but the name is given in a Vector + length(names) == 1 ? X = _dataframe_to_array(df, only(names)) : X = RowVecs(Matrix(df[:, names])) + return X +end + +### No transform, default +struct NoInputTransform <: AbstractInputTransform + input::Union{Symbol, Vector{<:Symbol}} +end + +NoInputTransform(input::Union{UQInput, Vector{<:UQInput}}) = NoInputTransform(names(input)) + +(transform::NoInputTransform)(df::DataFrame) = _dataframe_to_array(df, transform.input) + + +### ZScore transform +struct InputTransform <: AbstractInputTransform + transform::StatsBase.ZScoreTransform + input::Union{Symbol, Vector{<:Symbol}} +end + +# Construct from DataFrame +function InputTransform( + df::DataFrame, + input::Union{Symbol, Vector{<:Symbol}} +) + X = _dataframe_to_array(df, input) + transform = fit(ZScoreTransform, X; dims=1) + return InputTransform(transform, input) +end + +function (transform::InputTransform)(df::DataFrame) + X = _dataframe_to_array(df, transform.input) + return StatsBase.transform(transform.transform, X) +end + +### Standard normal transform +struct UQInputTransform <: AbstractInputTransform + uqinput::Union{UQInput, Vector{<:UQInput}} +end + +function (transform::UQInputTransform)(df::DataFrame) + df_copy = copy(df) + uqinput_names = names(transform.uqinput) + to_standard_normal_space!(transform.uqinput, df_copy) + # X is a Matrix for multiple inputs, else it is a Vector + X = _dataframe_to_array(df_copy, uqinput_names) + return X +end + +### No Outputtransform, default +struct NoOutputTransform <: AbstractOutputTransform + output::Symbol +end + +(transform::NoOutputTransform)(df::DataFrame) = _dataframe_to_array(df, transform.output) +inverse_transform(Y::Array, transform::NoOutputTransform) = Y + +### ZScore output transform +struct OutputTransform <: AbstractOutputTransform + transform::StatsBase.ZScoreTransform # there is probably a better way to do this + output::Symbol +end + +# Construct from DataFrame +function OutputTransform( + df::DataFrame, + output::Symbol +) + Y = _dataframe_to_array(df, output) # will fail if Y is not an array + transform = fit(ZScoreTransform, Y; dims=1) + return OutputTransform(transform, output) +end + +function (transform::OutputTransform)(df::DataFrame) + Y = _dataframe_to_array(df, transform.output) + return StatsBase.transform(transform.transform, Y) +end + +function inverse_transform(Y::Array, transform::OutputTransform) + return StatsBase.reconstruct(transform.transform, Y) +end \ No newline at end of file From 95ee9c9f7f8beed7a8ff4f5683e8c6778d9b9004 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 11 Feb 2025 19:08:48 +0100 Subject: [PATCH 039/117] Simple demo of current state --- demo/metamodels/gaussianprocess.jl | 116 +++++++++++------------------ 1 file changed, 44 insertions(+), 72 deletions(-) diff --git a/demo/metamodels/gaussianprocess.jl b/demo/metamodels/gaussianprocess.jl index 762d8dc7d..4a5b8ee58 100644 --- a/demo/metamodels/gaussianprocess.jl +++ b/demo/metamodels/gaussianprocess.jl @@ -1,82 +1,54 @@ -using UncertaintyQuantification, Plots -using AbstractGPs # not sure if to reexport -using ParameterHandling # not sure if to reexport -using Optim +using UncertaintyQuantification +using AbstractGPs using Random -Random.seed!(20140430) -n = 10 -input_symbol = :x -output_symbol = :y - -# Input_symbol for passing a UQModel to GaussianProcess -x = RandomVariable(Uniform(0, 2π), input_symbol) -y = Model( - df -> - (sin.(df.x) + 0.05*randn(length(df.x))), - output_symbol, +# Setup Himmelblau example +x = RandomVariable.(Uniform(-5, 5), [:x1, :x2]) +himmelblau = Model( + df -> (df.x1 .^ 2 .+ df.x2 .- 11) .^ 2 .+ (df.x1 .+ df.x2 .^ 2 .- 7) .^ 2, :y ) -exp_design = ExperimentalDesign(MonteCarlo(n)) - -# Input_symbol for passing a DataFrame to GaussianProcess -df = sample(x, n) -evaluate!(y, df) - -# Define how the GP is build -function build_gp(θ::NamedTuple) - k1 = (θ.SE.σ)^2 * with_lengthscale(SqExponentialKernel(), θ.SE.ℓ) - k2 = (θ.RQ.σ)^2 * with_lengthscale(RationalQuadraticKernel(; α=θ.RQ.α), θ.RQ.ℓ) - return GP(k1+k2) -end - -# gp parameters -θ = (; - SE = (; - σ = positive(1.), - ℓ = positive(1.) - ), - RQ = (; - σ = positive(1.), - ℓ = positive(1.), - α = positive(exp(-1.0)) - ) +design = FullFactorial([8, 8]) +training_data = sample(x, design) +evaluate!(himmelblau, training_data) + +# This will be used for proper initial guesses for the parameters of the GP +mean_data = mean(training_data[!, :y]) +std_data = std(training_data[!, :y]) + +# Setup the GP +# Note: If we do not initialize the parameters here properly the optimization will fail. Standardization should help with that. +σ² = 1e-5 +kernel = SqExponentialKernel() ∘ ARDTransform([1.0, 1.0]) +gp = with_gaussian_noise(GP(0.0, kernel), σ²) + +optimizer = MLE() +# TODO: StandardizeInput breaks currently due to -Inf and Inf values from to_standard_normal_space!() +# TODO: Optimization is extremely unstable +# TODO: Not all kernels have a extract_parameters and apply_parameters function +gpr = GaussianProcess( + gp, x, himmelblau, :y, design, StandardizeOutput(), MLE() ) -# noise -noise = fixed(exp(-2.)) +test_data = sample(x, 1000) +evaluate!(gpr, test_data) -# Fit GP to data from experimental design -gp_from_model = GaussianProcess( - x, - y, - :y, - exp_design, - build_gp, - θ, - noise -) +p_data = test_data[:, [:x1, :x2]] +evaluate!(himmelblau, p_data) -# Optimize hyperparameters (There should be a method that allows to do this on a already fitted gp instance) -opt_gp_from_model = GaussianProcess( - x, - y, - :y, - exp_design, - build_gp, - θ, - noise, - false, - false, - LBFGS() -) +mse = mean((p_data.y .- test_data.y) .^ 2) +println("MSE is: $mse") -function plotdata() - plot(; xlabel="x", ylabel="y", legend=:bottomright) - return scatter!(df[:, 1], df[:, 2]; label="training data", ms=2, markerstrokewidth=0) +using Plots +using DataFrames +a = range(-5, 5; length=1000) +b = range(5, -5; length=1000) +himmelblau_f(x1, x2) = (x1^2 + x2 - 11)^2 + (x1 + x2^2 - 7)^2 +function gpr_f(x, y) + df = DataFrame(x1 = x, x2 = y) + evaluate!(gpr, df) + return only(df[:, :y]) end -plot_gp!(f; label) = plot!(f(sort!(sample(x, 100)[:, 1])); ribbon_scale=2, linewidth=1, label) - -plotdata() -plot_gp!(gp_from_model.gp; label="posterior f(⋅)") -plot_gp!(opt_gp_from_model.gp; label="posterior f(⋅) optimized") \ No newline at end of file +s1 = surface(a, b, himmelblau_f; plot_title="Himmelblau's function") +s2 = surface(a, b, gpr_f; plot_title="Gaussian process regression") +plot(s1, s2, layout = (1, 2), legend = false) \ No newline at end of file From 19c8f424a04586eefa405ada086a36898f051f0e Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 11 Feb 2025 19:09:27 +0100 Subject: [PATCH 040/117] Added gaussian process exports --- src/UncertaintyQuantification.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index 6d3a1c523..d8c3d2dc9 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -106,6 +106,7 @@ export LeastSquares export LegendreBasis export LineSampling export SingleComponentMetropolisHastings +export MLE export Model export MonteCarlo export ParallelModel @@ -120,6 +121,8 @@ export RandomSlicing export ResponseSurface export SobolSampling export Solver +export StandardizeInput +export StandardizeOutput export SubSetInfinity export SubSetInfinityAdaptive export SubSetSimulation @@ -152,6 +155,7 @@ export to_copula_space export to_physical_space! export to_standard_normal_space export to_standard_normal_space! +export with_gaussian_noise include("inputs/empiricaldistribution.jl") include("inputs/inputs.jl") @@ -173,7 +177,9 @@ include("models/imprecise/propagation.jl") include("models/polyharmonicspline.jl") include("models/responsesurface.jl") include("models//slicingmodel.jl") -include("models/gp/datamanipulation.jl") +include("models/gp/standardization.jl") +include("models/gp/parameterization.jl") +include("models/gp/hyperparametertuning.jl") include("models/gp/gaussianprocess.jl") include("hpc/slurm.jl") From 0a9cdf94c8947cb992542239e4906fd6cf5daf28 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 11 Feb 2025 19:09:50 +0100 Subject: [PATCH 041/117] Refactoring --- src/models/gp/datamanipulation.jl | 55 -------------------- src/models/gp/normalization.jl | 84 ------------------------------- 2 files changed, 139 deletions(-) delete mode 100644 src/models/gp/datamanipulation.jl delete mode 100644 src/models/gp/normalization.jl diff --git a/src/models/gp/datamanipulation.jl b/src/models/gp/datamanipulation.jl deleted file mode 100644 index e89158eef..000000000 --- a/src/models/gp/datamanipulation.jl +++ /dev/null @@ -1,55 +0,0 @@ -function _dataframe_to_array( # That name sucks - df::DataFrame, - name::Union{Symbol, String} # do we use Strings? -) - return df[:, name] -end - -function _dataframe_to_array( # That name sucks - df::DataFrame, - names::Union{Vector{<:Symbol}, Vector{<:String}} # do we use Strings? -) - # check for the case where we want a single column but the name is given in a Vector - length(names) == 1 ? X = _dataframe_to_array(df, only(names)) : X = Matrix(df[:, names]) - return X -end - -function _handle_gp_input( - data::DataFrame, - input::Union{Symbol, UQInput}, - output::Symbol, - normalize_inp::Bool=false, - normalize_out::Bool=false -) - inp_dim = 1 - out_dim = 1 - - if isa(input, Symbol) - inp_transformer = InputTransformer(data, input, normalize_inp) - else - inp_transformer = UQInputTransformer(input, normalize_inp) - end - out_transformer = OutputTransformer(data, output, normalize_out) - - # Turn DataFrame samples into X and Y arrays for GP - x = inp_transformer(data) - y = out_transformer(data) - - return ( - inp_dim, out_dim, - inp_transformer, out_transformer, - x, y) -end - -function _handle_gp_input( - data::DataFrame, - inputs::Union{Vector{Symbol}, Vector{UQInput}}, - output::Symbol, - normalize_inp::Bool=false, - normalize_out::Bool=false -) - return _handle_gp_input( - data, inputs, [output], - normalize_inp, normalize_out - ) -end \ No newline at end of file diff --git a/src/models/gp/normalization.jl b/src/models/gp/normalization.jl deleted file mode 100644 index f6bbbd1b0..000000000 --- a/src/models/gp/normalization.jl +++ /dev/null @@ -1,84 +0,0 @@ -abstract type AbstractInputTransform end -abstract type AbstractOutputTransform end - -### No tranform, default -struct NoInputTransform <: AbstractInputTransform end - -function (transform::NoInputTransform)(df::DataFrame, input_names::Union{Symbol, Vector{<:Symbol}}) - X = _dataframe_to_array(df, input_names) - return X -end - -### ZScore transform -struct InputTransform <: AbstractInputTransform - transform::StatsBase.ZScoreTransform - input_names::Union{Symbol, Vector{<:Symbol}} -end - -# Construct from DataFrame -function InputTransform( - df::DataFrame, - input_names::Union{Symbol, Vector{<:Symbol}} -) - X = _dataframe_to_array(df, input_names) - transform = fit(ZScoreTransform, X; dims=1) - return InputTransform(transform, input_names) -end - -function (transform::InputTransform)(df::DataFrame) - X = _dataframe_to_array(df, transform.input_names) - return StatsBase.transform(transform.transform, X) -end - -### Standard normal transform -struct UQInputTransform <: AbstractInputTransform - uqinputs::Union{UQInput, Vector{<:UQInput}} -end - -function (transform::UQInputTransform)(df::DataFrame) - df_copy = copy(df) - to_standard_normal_space!(names(transform.uqinputs), df_copy) - # X is a Matrix for multiple inputs, else it is a Vector - X = _dataframe_to_array(df_copy, uqinput_names) - return X -end - -### No Outputtransform, default -struct NoOutputTransform <: AbstractOutputTransform - # we do not support multioutput yet - output_names::Union{Symbol, Vector{<:Symbol}} # need to handle uqinput -end - -function (transform::NoOutputTransform)(df::DataFrame) - Y = _dataframe_to_array(df, transform.output_names) - return Y -end - -function inverse_transform(Y::Array, transform::NoOutputTransform) - return Y -end - -### ZScore output transform -struct OutputTransform <: AbstractOutputTransform - transform::StatsBase.ZScoreTransform # there is probably a better way to do this - output_names::Union{Symbol, Vector{<:Symbol}} -end - -# Construct from DataFrame -function OutputTransform( - df::DataFrame, - output_names::Union{Symbol, Vector{<:Symbol}} -) - Y = _dataframe_to_array(df, output_names) # will fail if Y is not an array - transform = fit(ZScoreTransform, Y; dims=1) - return OutputTransform(transform, output_names) -end - -function (transform::OutputTransform)(df::DataFrame) - Y = _dataframe_to_array(df, transform.output_names) - return StatsBase.transform(transform.transform, Y) -end - -function inverse_transform(Y::Array, transform::OutputTransform) - return StatsBase.reconstruct(transform.transform, Y) -end \ No newline at end of file From 8d5934481ca48ccd834e9e71093ab92ac62a6513 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 11 Feb 2025 19:10:18 +0100 Subject: [PATCH 042/117] Simplified optimization routine for NoOptimization --- src/models/gp/hyperparametertuning.jl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/models/gp/hyperparametertuning.jl b/src/models/gp/hyperparametertuning.jl index cf6ffda23..39426019b 100644 --- a/src/models/gp/hyperparametertuning.jl +++ b/src/models/gp/hyperparametertuning.jl @@ -2,11 +2,8 @@ abstract type AbstractHyperparameterOptimization end struct NoOptimization <: AbstractHyperparameterOptimization end -function optimize_hyperparameters(gp::Union{AbstractGPs.GP, NoisyGP}, x, y, mle::NoOptimization) #!TYPES - # This is completely unnecessary, should maybe write a GP method for NoOpt - model, θ₀ = parameterize(gp) - flatparams, unflatten = ParameterHandling.flatten(θ₀) - return model(unflatten(flatparams)), unflatten(flatparams) +function optimize_hyperparameters(gp::Union{AbstractGPs.GP, NoisyGP}, x, y, opt::NoOptimization) #!TYPES + return gp end struct MLE <: AbstractHyperparameterOptimization @@ -44,5 +41,5 @@ function optimize_hyperparameters(gp::Union{AbstractGPs.GP, NoisyGP}, x, y, mle: end result = optimize(Optim.only_fg!(fg!), flatparams, mle.optimizer, mle.options; inplace=false) - return model(unflatten(result.minimizer)), unflatten(result.minimizer) + return model(unflatten(result.minimizer)) end \ No newline at end of file From 96efb0482e0e4adce08b3b789f1c2b2e01a37efa Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 11 Feb 2025 19:10:52 +0100 Subject: [PATCH 043/117] Added parameter routines for ARDTransform --- src/models/gp/parameterization.jl | 111 ++++++++---------------------- 1 file changed, 29 insertions(+), 82 deletions(-) diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl index 26a82926b..33be8c53b 100644 --- a/src/models/gp/parameterization.jl +++ b/src/models/gp/parameterization.jl @@ -6,15 +6,31 @@ function (p::Parameterized)(θ) return apply_parameters(p.object, ParameterHandling.value(θ)) end -""" - parameterize(object) -> model, θ +# """ +# parameterize(object) -> model, θ -Turn `object` into a callable parameterized version of itself and a parameter `θ`. -After assigning `model, θ = parameterize(object)`, calling `model(θ)` will yield the same -`object` back. -""" +# Turn `object` into a callable parameterized version of itself and a parameter `θ`. +# After assigning `model, θ = parameterize(object)`, calling `model(θ)` will yield the same +# `object` back. +# """ parameterize(object) = Parameterized(object), extract_parameters(object) +# Custom wrappers +struct NoisyGP{T<:GP,Tn<:Real} + gp::T + obs_noise::Tn +end + +(gp::NoisyGP)(x) = gp.gp(x, gp.obs_noise) + +with_gaussian_noise(gp::GP, obs_noise::Real) = NoisyGP(gp, obs_noise) + +extract_parameters(f::NoisyGP) = ( + extract_parameters(f.gp), + ParameterHandling.positive(f.obs_noise, exp, 1e-6) + ) +apply_parameters(f::NoisyGP, θ) = NoisyGP(apply_parameters(f.gp, θ[1]), θ[2]) + # Mean functions extract_parameters(::ZeroMean) = nothing apply_parameters(m::ZeroMean, θ) = m @@ -34,6 +50,9 @@ apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=[θ]) extract_parameters(k::RationalQuadraticKernel) = ParameterHandling.positive(only(k.α)) apply_parameters(k::RationalQuadraticKernel, θ) = RationalQuadraticKernel(; α=θ, metric=k.metric) +extract_parameters(k::ConstantKernel) = ParameterHandling.positive(only(k.c)) +apply_parameters(k::ConstantKernel, θ) = ConstantKernel(; c=θ) + # Composite kernels extract_parameters(k::KernelSum) = map(extract_parameters, k.kernels) apply_parameters(k::KernelSum, θ) = KernelSum(map(apply_parameters, k.kernels, θ)) @@ -50,87 +69,15 @@ extract_parameters(k::ScaledKernel) = (extract_parameters(k.kernel), ParameterHa apply_parameters(k::ScaledKernel, θ) = ScaledKernel(apply_parameters(k.kernel, θ[1]), θ[2]) # Transforms +# !WARNING: Incomplete extract_parameters(t::ScaleTransform) = ParameterHandling.positive(only(t.s)) apply_parameters(::ScaleTransform, θ) = ScaleTransform(θ) -# # Likelihoods -# extract_parameters(::BernoulliLikelihood) = nothing -# apply_parameters(l::BernoulliLikelihood, θ) = l -# _isequal(l1::T, l2::T) where {T<:BernoulliLikelihood} = true - -# extract_parameters(::PoissonLikelihood) = nothing -# apply_parameters(l::PoissonLikelihood, θ) = l -# _isequal(l1::T, l2::T) where {T<:PoissonLikelihood} = true +extract_parameters(t::ARDTransform) = ParameterHandling.positive(t.v) +apply_parameters(::ARDTransform, θ) = ARDTransform(θ) # GPs extract_parameters(f::GP) = (extract_parameters(f.mean), extract_parameters(f.kernel)) apply_parameters(f::GP, θ) = GP( apply_parameters(f.mean, θ[1]), apply_parameters(f.kernel, θ[2]) - ) - -# extract_parameters(f::LatentGP) = (extract_parameters(f.f), extract_parameters(f.lik)) -# function apply_parameters(f::LatentGP, θ) -# return LatentGP(apply_parameters(f.f, θ[1]), apply_parameters(f.lik, θ[2]), f.Σy) -# end - -# # Approximations -# const SVA = SparseVariationalApproximation - -# function extract_parameters(sva::SVA, fixed_inducing_points::Bool) -# fz_par = fixed_inducing_points ? nothing : collect(sva.fz.x) -# q_par = extract_parameters(sva.q) -# return (fz_par, q_par) -# end - -# function apply_parameters(sva::SVA, θ) -# fz = isnothing(θ[1]) ? sva.fz : sva.fz.f(θ[1]) -# q = apply_parameters(sva.q, θ[2]) -# return SVA(fz, q) -# end - -# variational_gaussian(n::Int, T=Float64) = MvNormal(zeros(T, n), Matrix{T}(I, n, n)) - -# # Distributions -# extract_parameters(d::MvNormal) = (d.μ, ParameterHandling.positive_definite(d.Σ)) -# apply_parameters(::MvNormal, θ) = MvNormal(θ[1], θ[2]) -# _isequal(d1::MvNormal, d2::MvNormal) = isapprox(d1.μ, d1.μ) && isapprox(d1.Σ, d2.Σ) - -# Custom wrappers -struct NoisyGP{T<:GP,Tn<:Real} - gp::T - obs_noise::Tn -end - -(gp::NoisyGP)(x) = gp.gp(x, gp.obs_noise) - -with_gaussian_noise(gp::GP, obs_noise::Real) = NoisyGP(gp, obs_noise) - -extract_parameters(f::NoisyGP) = ( - extract_parameters(f.gp), - ParameterHandling.positive(f.obs_noise, exp, 1e-6) - ) -apply_parameters(f::NoisyGP, θ) = NoisyGP(apply_parameters(f.gp, θ[1]), θ[2]) - -# function _isequal(f1::NoisyGP, f2::NoisyGP) -# return _isequal(f1.gp, f2.gp) && isapprox(f1.obs_noise, f2.obs_noise) -# end - -# struct SVGP{T<:LatentGP,Ts<:SVA} -# lgp::T -# sva::Ts -# fixed_inducing_points::Bool -# end - -# SVGP(lgp, sva; fixed_inducing_points) = SVGP(lgp, sva, fixed_inducing_points) - -# function extract_parameters(f::SVGP) -# return (extract_parameters(f.lgp), extract_parameters(f.sva, f.fixed_inducing_points)) -# end - -# function apply_parameters(f::SVGP, θ) -# lgp = apply_parameters(f.lgp, θ[1]) -# sva = apply_parameters(f.sva, θ[2]) -# return SVGP(lgp, SVA(lgp(sva.fz.x).fx, sva.q), f.fixed_inducing_points) -# end - -# costfunction(svgp::SVGP, data) = -elbo(svgp.sva, svgp.lgp(data.x), data.y) \ No newline at end of file + ) \ No newline at end of file From ca876d871bf6f43a9bce6e0b51ce846ea45cd977 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 11 Feb 2025 19:12:05 +0100 Subject: [PATCH 044/117] Added constructors and evaluate --- src/models/gp/gaussianprocess.jl | 300 ++++++++++++++++--------------- 1 file changed, 158 insertions(+), 142 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 27f01d500..1aa79fd8c 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -1,51 +1,49 @@ -""" - GaussianProcess(data::DataFrame, dependendVarName::Symbol, deg::Int, dim::Int) - -Creates a gaussian process prior .... - -# Examples -```jldoctest -julia> data = DataFrame(x = 1:10, y = [1, 4, 10, 15, 24, 37, 50, 62, 80, 101]); - -julia> rs = ResponseSurface(data, :y, 2) |> DisplayAs.withcontext(:compact => true) -ResponseSurface([0.483333, -0.238636, 1.01894], :y, [:x], 2, Monomial{Commutative{CreationOrder}, Graded{LexOrder}}[1, x₁, x₁²]) -``` -""" struct GaussianProcess <: UQModel - gp::AbstractGPs.FiniteGP - input::Vector{Symbol} + gp::AbstractGPs.PosteriorGP + input::Union{Symbol, Vector{Symbol}} output::Symbol - input_transform::AbstractInputTransform # not sure if these should transform hyperparams as well - output_transform::AbstractOutputTransform + datatransformer::DataTransform end -""" ---------------------------------------------------------------------------------------------------- """ -# names = propertynames(data[:, Not(output)]) -## DATAFRAME INPUT OUTPUT STUFF +# Build from Dataframe function GaussianProcess( - gp::AbstractGPs.GP, + gp::Union{AbstractGPs.GP, NoisyGP}, data::DataFrame, output::Symbol, - input_transform::NoInputTransform, - output_transform::NoOutputTransform, - optimization::NoOptimization + instandard::StandardizeInput, + outstandard::StandardizeOutput, + optimization::AbstractHyperparameterOptimization ) # should we use keyword args? - - return + input = propertynames(data[:, Not(output)]) + datatransformer = DataTransform( + data, instandard, input, outstandard, output + ) + x = datatransformer.input(data) + y = datatransformer.output(data) + optimized_gp = optimize_hyperparameters(gp, x, y, optimization) + posterior_gp = posterior(optimized_gp(x), y) + return GaussianProcess( + posterior_gp, + input, + output, + datatransformer + ) end function GaussianProcess( gp::AbstractGPs.GP, data::DataFrame, output::Symbol, + instandard::StandardizeInput, + optimization::AbstractHyperparameterOptimization ) # should we use keyword args? return GaussianProcess( gp, data, output, - NoInputTransform(), - NoOutputTransform(), - NoOptimization() + instandard, + StandardizeOutput(false), + optimization ) end @@ -53,147 +51,165 @@ function GaussianProcess( gp::AbstractGPs.GP, data::DataFrame, output::Symbol, - input_transform::NoInputTransform, - output_transform::NoOutputTransform, + outstandard::StandardizeOutput, optimization::AbstractHyperparameterOptimization ) # should we use keyword args? - - return + return GaussianProcess( + gp, + data, + output, + StandardizeInput(false), + outstandard, + optimization + ) end function GaussianProcess( gp::AbstractGPs.GP, data::DataFrame, - output::Symbol; - input_transform::AbstractInputTransformer, - output_transform::AbstractOutputTransformer, + output::Symbol, optimization::AbstractHyperparameterOptimization -) - return +) # should we use keyword args? + return GaussianProcess( + gp, + data, + output, + StandardizeInput(false), + StandardizeOutput(false), + optimization + ) end -# Custom meanfunctions will break Zygote autodiff for multidimensional inputs -# Create from DataFrame function GaussianProcess( + gp::AbstractGPs.GP, data::DataFrame, - inputs::Symbol, - outputs::Symbol, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing -) - (inp_dim, out_dim, - inp_transformer, out_transformer, - x, y) = _handle_gp_input( - data, inputs, outputs, - normalize_inp, normalize_out - ) - - θ = (; - mean_and_kernel = params, - noise = (;noise_params = noise) + output::Symbol +) # should we use keyword args? + return GaussianProcess( + gp, + data, + output, + StandardizeInput(false), + StandardizeOutput(false), + NoOptimization() ) +end - gp = build_gp_posterior(build_gp, θ, x, y, optimizer) +# Build with UQmodel +function GaussianProcess( + gp::Union{AbstractGPs.GP, NoisyGP}, + input::Union{UQInput, Vector{<:UQInput}}, + model::Union{UQModel, Vector{<:UQModel}}, + output::Symbol, + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}, + instandard::StandardizeInput, + outstandard::StandardizeOutput, + optimization::AbstractHyperparameterOptimization +) + data = sample(input, experimentaldesign) # need to be able to pass experimental design + evaluate!(model, data) + datatransformer = DataTransform( + data, instandard, input, outstandard, output + ) + x = datatransformer.input(data) + y = datatransformer.output(data) + optimized_gp = optimize_hyperparameters(gp, x, y, optimization) + posterior_gp = posterior(optimized_gp(x), y) return GaussianProcess( - gp, inputs, outputs, - inp_dim, out_dim, - inp_transformer, out_transformer - ) + posterior_gp, + names(input), + output, + datatransformer + ) end -""" ---------------------------------------------------------------------------------------------------- """ -## UQMODEL STUFF -""" experimental design """ -struct ExperimentalDesign # not sure about the name - sim::AbstractMonteCarlo # could also allow doe +function GaussianProcess( + gp::Union{AbstractGPs.GP, NoisyGP}, + input::Union{UQInput, Vector{<:UQInput}}, + model::Union{UQModel, Vector{<:UQModel}}, + output::Symbol, + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}, + instandard::StandardizeInput, + optimization::AbstractHyperparameterOptimization +) + return GaussianProcess( + gp, + input, + model, + output, + experimentaldesign, + instandard, + StandardizeOutput(false), + optimization + ) end -""" GP from uqinput and model """ -# This creates a DataFrame and the calls the method above -# need to treat this differently because univariate in- and output function GaussianProcess( - inputs::UQInput, - model::Vector{<:UQModel}, - outputs::Symbol, - exp_design::ExperimentalDesign, - build_gp::Function, - params::NamedTuple, - noise::NoiseTypes=positive(exp(-2.0)), # could support functions for noise as well... - normalize_inp::Bool=false, - normalize_out::Bool=false, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing}=nothing + gp::Union{AbstractGPs.GP, NoisyGP}, + input::Union{UQInput, Vector{<:UQInput}}, + model::Union{UQModel, Vector{<:UQModel}}, + output::Symbol, + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}, + outstandard::StandardizeOutput, + optimization::AbstractHyperparameterOptimization ) - data = sample(inputs, exp_design.sim) # need to be able to pass experimental design - evaluate!(model, data) - - # random_inputs = filter(i -> isa(i, RandomUQInput), inputs) - # random_input_names = names(random_inputs) - - (inp_dim, out_dim, - inp_transformer, out_transformer, - x, y) = _handle_gp_input( - data, inputs, outputs, - normalize_inp, normalize_out - ) - - θ = (; - mean_and_kernel = params, - noise = (;noise_params = noise) + return GaussianProcess( + gp, + input, + model, + output, + experimentaldesign, + StandardizeInput(false), + outstandard, + optimization ) +end - gp = build_gp_posterior(build_gp, θ, x, y, optimizer) - +function GaussianProcess( + gp::Union{AbstractGPs.GP, NoisyGP}, + input::Union{UQInput, Vector{<:UQInput}}, + model::Union{UQModel, Vector{<:UQModel}}, + output::Symbol, + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}, + optimization::AbstractHyperparameterOptimization +) return GaussianProcess( - gp, inputs.name, outputs, - inp_dim, out_dim, - inp_transformer, out_transformer - ) + gp, + input, + model, + output, + experimentaldesign, + StandardizeInput(false), + StandardizeOutput(false), + optimization + ) end -# what should this calculate? Calculates mean for now -function evaluate!(gp::GaussianProcess, df::DataFrame) # this now gives mean and variance at input - x = gp.inp_transformer(df) - - if gp.inp_dim != 1 || gp.out_dim != 1 # here we have to reformat the input - x = MOInput(RowVecs(x), gp.out_dim) - end +function GaussianProcess( + gp::Union{AbstractGPs.GP, NoisyGP}, + input::Union{UQInput, Vector{<:UQInput}}, + model::Union{UQModel, Vector{<:UQModel}}, + output::Symbol, + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments} +) + return GaussianProcess( + gp, + input, + model, + output, + experimentaldesign, + StandardizeInput(false), + StandardizeOutput(false), + NoOptimization() + ) +end +# what should this calculate? Calculates only mean for now +function evaluate!(gp::GaussianProcess, data::DataFrame) + x = gp.datatransformer.input(data) y = mean(gp.gp(x)) - if gp.out_dim == 1 - y = inverse_transform(y, gp.out_transformer) - else - y = reshape(mean(gp.gp(x)), :, gp.out_dim) - y = inverse_transform(y, gp.out_transformer) - end - - insertcols!(df, (gp.output .=> eachcol(y))...) + data[!, gp.output] = inverse_transform(y, gp.datatransformer.output) return nothing -end - -function build_gp_posterior( - build_gp::Function, - θ::NamedTuple, - x::AbstractArray{<:Real}, - y::AbstractArray{<:Real}, - optimizer::Union{Optim.FirstOrderOptimizer, Nothing} -) - if isnothing(optimizer) - # If no optimizer is given we just conditionalize on output - gp = build_gp(ParameterHandling.value(θ.mean_and_kernel)) - fx = gp(x, only(ParameterHandling.value(θ.noise))^2) # this should be possible to do in a better way... - gp = posterior(fx, y) - else - # Use the passed optimizer to maximize marginal log likelihood - θ_opt, logml_ = maximize_logml(logml, θ, x, y, build_gp; optimizer=optimizer) # should I return the logml? - gp = build_gp(ParameterHandling.value(θ_opt.mean_and_kernel)) - fx = gp(x, only(ParameterHandling.value(θ_opt.noise))^2) # this should be possible to do in a better way... - gp = posterior(fx, y) - end - return gp -end +end \ No newline at end of file From 867fdbcaa9739ff8d54cfaf53dbbc0114e74be5d Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Thu, 14 Aug 2025 13:18:54 +0200 Subject: [PATCH 045/117] Added compat entries for AbstractGPs and Zygote --- Project.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Project.toml b/Project.toml index 6fa10c923..9e5958abf 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,6 @@ name = "UncertaintyQuantification" uuid = "7183a548-a887-11e9-15ce-a56ab60bad7a" -authors = [ - "Jasper Behrensdorf ", - "Ander Gray ", -] +authors = ["Jasper Behrensdorf ", "Ander Gray "] version = "0.12.0" [deps] @@ -24,6 +21,7 @@ MeshAdaptiveDirectSearch = "f4d74008-4565-11e9-04bd-4fe404e6a92a" Monomials = "272bfe72-f66c-432f-a94d-600f29493792" Mustache = "ffc61752-8dc7-55ee-8c37-f3e9cdd09e70" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +ParameterHandling = "2412ca09-6db7-441c-8e3a-88d5709968c5" Primes = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae" QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" @@ -35,6 +33,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] +AbstractGPs = "0.5.24" Bootstrap = "2.2" CovarianceEstimation = "0.2" DataFrames = "0.22, 1.0" @@ -55,4 +54,5 @@ Reexport = "0.2, 1.0" Roots = "2.2.2" Statistics = "1" StatsBase = "0.33, 0.34" +Zygote = "0.7.10" julia = "1.10" From faf254e937a3a31e84547357bb0fbcc8f03d4839 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Tue, 19 Aug 2025 17:22:12 +0200 Subject: [PATCH 046/117] Redesigned data standardization --- src/models/gp/standardization.jl | 251 ++++++++++++++++++------------- 1 file changed, 143 insertions(+), 108 deletions(-) diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index 4dd59984f..acec3e7c0 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -1,139 +1,174 @@ -struct StandardizeInput - flag::Bool -end - -StandardizeInput() = StandardizeInput(true) +""" + AbstractInputTransform -struct StandardizeOutput - flag::Bool -end +Abstract type for input transformations used to describe how input features (columns of a DataFrame) should be +preprocessed before fitting a model (e.g. no transform, z-score standardization). +""" +abstract type AbstractInputTransform end -StandardizeOutput() = StandardizeOutput(true) +""" + AbstractOutputTransform -# DataTransform -abstract type AbstractInputTransform end +Abstract type for output transformations used to describe how model output (columns of a DataFrame) should be +preprocessed before fitting a model (e.g. no transform, z-score standardization). +""" abstract type AbstractOutputTransform end -struct DataTransform - input::AbstractInputTransform - output::AbstractOutputTransform -end +""" + DataStandardization(input::AbstractInputTransform, output::AbstractOutputTransform) -function DataTransform( - data::DataFrame, - instandard::StandardizeInput, - input::Union{Symbol, Vector{<:Symbol}}, - outstandard::StandardizeOutput, - output::Symbol -) - intransform = instandard.flag ? InputTransform(data, input) : NoInputTransform(input) - outtransform = outstandard.flag ? OutputTransform(data, output) : NoOutputTransform(output) - return DataTransform(intransform, outtransform) +Container that holds the input and output transformation strategies to be applied +to a dataset. +""" +struct DataStandardization + input_transform::AbstractInputTransform + output_transform::AbstractOutputTransform end -function DataTransform( - data::DataFrame, - instandard::StandardizeInput, - input::Union{UQInput, Vector{<:UQInput}}, - outstandard::StandardizeOutput, - output::Symbol -) - intransform = instandard.flag ? UQInputTransform(input) : NoInputTransform(input) - outtransform = outstandard.flag ? OutputTransform(data, output) : NoOutputTransform(output) - return DataTransform(intransform, outtransform) -end +DataStandardization() = DataStandardization(NoInputTransform(), NoOutputTransform()) -# Utility functions to handle DataFrames -function _dataframe_to_array( - df::DataFrame, - name::Symbol -) - return df[:, name] -end +# ---------------- Input transforms ---------------- -function _dataframe_to_array( - df::DataFrame, - names::Vector{<:Symbol} -) - # check for the case where we want a single column but the name is given in a Vector - length(names) == 1 ? X = _dataframe_to_array(df, only(names)) : X = RowVecs(Matrix(df[:, names])) - return X -end +""" + NoInputTransform(input) -### No transform, default -struct NoInputTransform <: AbstractInputTransform - input::Union{Symbol, Vector{<:Symbol}} -end +No transformation is applied to the specified input columns. +""" +struct NoInputTransform <: AbstractInputTransform end -NoInputTransform(input::Union{UQInput, Vector{<:UQInput}}) = NoInputTransform(names(input)) +""" + ZScoreInputTransform(input) -(transform::NoInputTransform)(df::DataFrame) = _dataframe_to_array(df, transform.input) +Applies z-score standardization (mean 0, variance 1) to the specified input columns. +""" +struct ZScoreInputTransform <: AbstractInputTransform end -### ZScore transform -struct InputTransform <: AbstractInputTransform - transform::StatsBase.ZScoreTransform - input::Union{Symbol, Vector{<:Symbol}} -end +# ---------------- Output transforms ---------------- -# Construct from DataFrame -function InputTransform( - df::DataFrame, - input::Union{Symbol, Vector{<:Symbol}} -) - X = _dataframe_to_array(df, input) - transform = fit(ZScoreTransform, X; dims=1) - return InputTransform(transform, input) -end +""" + NoOutputTransform(output) -function (transform::InputTransform)(df::DataFrame) - X = _dataframe_to_array(df, transform.input) - return StatsBase.transform(transform.transform, X) -end +No transformation is applied to the specified output column. +""" +struct NoOutputTransform <: AbstractOutputTransform end -### Standard normal transform -struct UQInputTransform <: AbstractInputTransform - uqinput::Union{UQInput, Vector{<:UQInput}} -end +""" + ZScoreOutputTransform(output) -function (transform::UQInputTransform)(df::DataFrame) - df_copy = copy(df) - uqinput_names = names(transform.uqinput) - to_standard_normal_space!(transform.uqinput, df_copy) - # X is a Matrix for multiple inputs, else it is a Vector - X = _dataframe_to_array(df_copy, uqinput_names) - return X -end +Applies z-score standardization (mean 0, variance 1) to the specified output column. +Provides both forward (`f`) and inverse (`f⁻¹`) transformations. +""" +struct ZScoreOutputTransform <: AbstractOutputTransform end + +# ---------------- Builders ---------------- -### No Outputtransform, default -struct NoOutputTransform <: AbstractOutputTransform - output::Symbol +""" + build_datatransform(data::DataFrame, transform::AbstractInputTransform) + +Builds a transformation function `f(df::DataFrame) -> Array` that converts +the specified input columns of `df` into an array, optionally applying a +standardization transform. +""" +function build_datatransform( + data::DataFrame, + input::Union{Symbol, Vector{<:Symbol}}, + transform::NoInputTransform +) + f(df::DataFrame) = _dataframe_to_array(df, input) + return f end -(transform::NoOutputTransform)(df::DataFrame) = _dataframe_to_array(df, transform.output) -inverse_transform(Y::Array, transform::NoOutputTransform) = Y +build_datatransform( + data::DataFrame, + input::Union{UQInput, Vector{<:UQInput}}, + transform::NoInputTransform + ) = build_datatransform(data, names(input), transform) -### ZScore output transform -struct OutputTransform <: AbstractOutputTransform - transform::StatsBase.ZScoreTransform # there is probably a better way to do this - output::Symbol +function build_datatransform( + data::DataFrame, + input::Union{Symbol, Vector{<:Symbol}}, + transform::ZScoreInputTransform +) + input_array = _dataframe_to_array(data, input) + zscore_transform = fit(ZScoreTransform, input_array; dims=1) + f(df::DataFrame) = StatsBase.transform( + zscore_transform, + _dataframe_to_array(df, input) + ) + return f +end + +build_datatransform( + data::DataFrame, + input::Union{UQInput, Vector{<:UQInput}}, + transform::ZScoreInputTransform + ) = build_datatransform(data, names(input), transform) + +""" + build_datatransform(data::DataFrame, transform::AbstractOutputTransform) + +Builds a tuple `(f, f⁻¹)` of transformation functions for the specified output column: + +- `f(df)` applies the output transformation to data. +- `f⁻¹(Y)` reverses the transformation for predictions. +""" +function build_datatransform( + data::DataFrame, + output::Symbol, + transform::NoOutputTransform +) + f(df::DataFrame) = _dataframe_to_array(df, output) + f⁻¹(Y::AbstractArray) = Y + return (f, f⁻¹) end -# Construct from DataFrame -function OutputTransform( - df::DataFrame, - output::Symbol +function build_datatransform( + data::DataFrame, + output::Symbol, + transform::ZScoreOutputTransform ) - Y = _dataframe_to_array(df, output) # will fail if Y is not an array - transform = fit(ZScoreTransform, Y; dims=1) - return OutputTransform(transform, output) + output_array = _dataframe_to_array(data, output) # will fail if Y is not an array + zscore_transform = fit(ZScoreTransform, output_array; dims=1) + f(df::DataFrame) = StatsBase.transform( + zscore_transform, + _dataframe_to_array(df, output) + ) + f⁻¹(Y::AbstractArray) = StatsBase.reconstruct(zscore_transform, Y) + return return (f, f⁻¹) end -function (transform::OutputTransform)(df::DataFrame) - Y = _dataframe_to_array(df, transform.output) - return StatsBase.transform(transform.transform, Y) +function build_datatransforms( + data::DataFrame, + input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, + output::Symbol, + ds::DataStandardization +) + fᵢ = build_datatransform(data, input, ds.input_transform) + fₒ, fₒ⁻¹ = build_datatransform(data, output, ds.output_transform) + return (fᵢ, fₒ, fₒ⁻¹) end -function inverse_transform(Y::Array, transform::OutputTransform) - return StatsBase.reconstruct(transform.transform, Y) + +# ### Standard normal transform +# struct UQInputTransform <: AbstractInputTransform +# uqinput::Union{UQInput, Vector{<:UQInput}} +# end + +# function (transform::UQInputTransform)(df::DataFrame) +# df_copy = copy(df) +# uqinput_names = names(transform.uqinput) +# to_standard_normal_space!(transform.uqinput, df_copy) +# # X is a Matrix for multiple inputs, else it is a Vector +# X = _dataframe_to_array(df_copy, uqinput_names) +# return X +# end + +# ---------------- Utility ---------------- + +_dataframe_to_array(df::DataFrame, name::Symbol) = df[:, name] + +function _dataframe_to_array(df::DataFrame, names::Vector{<:Symbol}) + length(names) == 1 ? x = _dataframe_to_array(df, only(names)) : x = RowVecs(Matrix(df[:, names])) + return x end \ No newline at end of file From 1b373d2a9f11e8debc1734cd51c1c9cee84b30f5 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Tue, 19 Aug 2025 17:23:07 +0200 Subject: [PATCH 047/117] Added DifferentiationInterface and compat entries --- Project.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Project.toml b/Project.toml index 9e5958abf..69ce21ed4 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Dierckx = "39dd38d3-220a-591b-8e3c-4c3a8c710a94" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838" @@ -39,6 +40,7 @@ CovarianceEstimation = "0.2" DataFrames = "0.22, 1.0" DelimitedFiles = "1" Dierckx = "0.5" +DifferentiationInterface = "0.7.4" Distributions = "0.24, 0.25" FastGaussQuadrature = "0.4, 0.5, 1" FiniteDifferences = "0.12" @@ -47,6 +49,7 @@ MeshAdaptiveDirectSearch = "0.1.0" Monomials = "1.0" Mustache = "1.0" Optim = "1.9.4" +ParameterHandling = "0.5.0" Primes = "0.5" QuadGK = "2.11.1" QuasiMonteCarlo = "0.3" From 39f2a53a91dfce4b54b44bd8c5b88be2911bc3a2 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Tue, 19 Aug 2025 17:24:16 +0200 Subject: [PATCH 048/117] Using ParameterHandling for gaussian processes --- src/UncertaintyQuantification.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index dc0d776f3..2b6af319e 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -15,6 +15,7 @@ using MeshAdaptiveDirectSearch using Monomials using Mustache using Optim +using ParameterHandling using Primes using QuadGK using QuasiMonteCarlo @@ -125,7 +126,7 @@ export LineSampling export SingleComponentMetropolisHastings export MaximumAPosterioriBayesian export MaximumLikelihoodBayesian -export MLE +export MaximumLikelihoodEstimation export Model export MonteCarlo export ParallelModel From 740603600306c539ebfac42f46f3df5c9ffee9af Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Tue, 19 Aug 2025 17:25:05 +0200 Subject: [PATCH 049/117] Refactoring of type II maximum likelihood estimation --- src/models/gp/hyperparametertuning.jl | 70 ++++++++++++++------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/src/models/gp/hyperparametertuning.jl b/src/models/gp/hyperparametertuning.jl index 39426019b..d5edc4ba9 100644 --- a/src/models/gp/hyperparametertuning.jl +++ b/src/models/gp/hyperparametertuning.jl @@ -1,45 +1,49 @@ +using DifferentiationInterface + abstract type AbstractHyperparameterOptimization end struct NoOptimization <: AbstractHyperparameterOptimization end -function optimize_hyperparameters(gp::Union{AbstractGPs.GP, NoisyGP}, x, y, opt::NoOptimization) #!TYPES - return gp -end - -struct MLE <: AbstractHyperparameterOptimization +struct MaximumLikelihoodEstimation <: AbstractHyperparameterOptimization optimizer::Optim.FirstOrderOptimizer options::Optim.Options end -MLE() = MLE(Optim.LBFGS(), Optim.Options(; iterations=1000, show_trace=true)) - -objective(f::Union{AbstractGPs.GP, NoisyGP}, x, y, mle::MLE) = -logpdf(f(x), y) +MaximumLikelihoodEstimation() = MaximumLikelihoodEstimation( + Optim.LBFGS(), + Optim.Options(; iterations=1000, show_trace=false) +) + +function optimize_hyperparameters( + gp::Union{AbstractGPs.GP, NoisyGP}, + x, + y, + opt::NoOptimization +) + return gp +end -function optimize_hyperparameters(gp::Union{AbstractGPs.GP, NoisyGP}, x, y, mle::MLE) #!TYPES +objective( + f::Union{AbstractGPs.GP, NoisyGP}, + x, + y, + mle::MaximumLikelihoodEstimation +) = -logpdf(f(x), y) + +function optimize_hyperparameters( + gp::Union{AbstractGPs.GP, NoisyGP}, + x, + y, + mle::MaximumLikelihoodEstimation +) #!TYPES model, θ₀ = parameterize(gp) - flatparams, unflatten = ParameterHandling.flatten(θ₀) - - ## https://julianlsolvers.github.io/Optim.jl/stable/#user/tipsandtricks/#avoid-repeating-computations - function fg!(F, G, θ) - if F !== nothing && G !== nothing - val, grad = Zygote.withgradient( - θ -> objective(model(unflatten(θ)), x, y, mle), - θ - ) - G .= only(grad) - return val - elseif G !== nothing - grad = Zygote.gradient( - θ -> objective(model(unflatten(θ)), x, y, mle), - θ - ) - G .= only(grad) - return nothing - elseif F !== nothing - return objective(model(unflatten(θ)), x, y, mle) - end - end - - result = optimize(Optim.only_fg!(fg!), flatparams, mle.optimizer, mle.options; inplace=false) + θ₀_flat, unflatten = ParameterHandling.flatten(θ₀) + + result = optimize( + θ -> objective(model(unflatten(θ)), x, y, mle), + θ₀_flat, + mle.optimizer, mle.options; + autodiff=AutoZygote() + ) return model(unflatten(result.minimizer)) end \ No newline at end of file From a61da82cf51c4a3c294de66b0dfd2b4d00a609be Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Tue, 19 Aug 2025 17:25:58 +0200 Subject: [PATCH 050/117] Added input and output transforms to gaussian process struct --- src/models/gp/gaussianprocess.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 1aa79fd8c..4ef688a21 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -2,7 +2,8 @@ struct GaussianProcess <: UQModel gp::AbstractGPs.PosteriorGP input::Union{Symbol, Vector{Symbol}} output::Symbol - datatransformer::DataTransform + input_transform::Function + output_transform::Function end # Build from Dataframe From 3bfd29e45f6980dd6f43530e0bd80ed7f1a264b5 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 20 Aug 2025 17:58:03 +0200 Subject: [PATCH 051/117] Preliminary file to test parameter extraction --- demo/metamodels/test.jl | 130 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 demo/metamodels/test.jl diff --git a/demo/metamodels/test.jl b/demo/metamodels/test.jl new file mode 100644 index 000000000..d20377744 --- /dev/null +++ b/demo/metamodels/test.jl @@ -0,0 +1,130 @@ +using ParameterHandling +using AbstractGPs + + +struct Parameterized{T} + object::T +end + +function (p::Parameterized)(θ) + return apply_parameters(p.object, ParameterHandling.value(θ)) +end + +parameterize(object) = Parameterized(object), extract_parameters(object) + +extract_parameters(f::GP) = (extract_parameters(f.mean), extract_parameters(f.kernel)) +function apply_parameters(f::GP, θ) + return GP(apply_parameters(f.mean, θ[1]), apply_parameters(f.kernel, θ[2])) +end + +extract_parameters(m::ConstMean) = m.c +apply_parameters(::ConstMean, θ) = ConstMean(θ) + +KernelsWithoutParameters = Union{SEKernel,Matern32Kernel,Matern52Kernel,WhiteKernel} + +extract_parameters(::T) where {T<:KernelsWithoutParameters} = nothing +apply_parameters(k::T, θ) where {T<:KernelsWithoutParameters} = k + +# ------------------------------------------ + +extract_parameters(k::TransformedKernel) = (extract_parameters(k.kernel), extract_parameters(k.transform)) +apply_parameters(k::TransformedKernel, θ) = TransformedKernel( + apply_parameters(k.kernel, θ[1]), apply_parameters(k.transform, θ[2]) + ) + +extract_parameters(k::PeriodicKernel) = ParameterHandling.positive(k.r) +apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=θ) + +extract_parameters(t::ARDTransform) = ParameterHandling.positive(t.v) +apply_parameters(::ARDTransform, θ) = ARDTransform(θ) + +extract_parameters(t::LinearTransform) = t.A +apply_parameters(::LinearTransform, θ) = LinearTransform(θ) + +extract_parameters(k::KernelSum) = map(extract_parameters, k.kernels) +apply_parameters(k::KernelSum, θ) = KernelSum(map(apply_parameters, k.kernels, θ)) + +kernel = PeriodicKernel(2) +kernel = PeriodicKernel(1) + +transform = LinearTransform(rand(2,2)) + +extract_parameters(kernel) +extract_parameters(transform) + +A = rand(2,2) +kernel = SqExponentialKernel() ∘ LinearTransform(A) +kernel = (SqExponentialKernel() ∘ ARDTransform([1.0, 1.0])) ⊗ (SqExponentialKernel() ∘ LinearTransform(A)) +gp = GP(0.0, kernel) + +params = extract_parameters(gp) +model, θ = parameterize(gp) +θ_flat, unflatten = ParameterHandling.flatten(θ) +gp_model = model(unflatten(θ_flat)) + + +struct FixedTransform{T} <: Transform + component::T +end + +struct FixedKernel{T} <: Kernel + component::T +end + +fixed(t::Transform) = FixedTransform(t) +fixed(k::Kernel) = FixedKernel(k) + +extract_parameters(c::FixedTransform) = ParameterHandling.fixed(extract_parameters(c.component)) +apply_parameters(c::FixedTransform, θ) = apply_parameters(c.component, θ) + +extract_parameters(c::FixedKernel) = ParameterHandling.fixed(extract_parameters(c.component)) +apply_parameters(c::FixedKernel, θ) = apply_parameters(c.component, θ) + +A = rand(2,2) +kernel = fixed((SqExponentialKernel() ∘ LinearTransform(A)) + (SqExponentialKernel() ∘ ARDTransform([1.0, 1.0]))) +gp = GP(0.0, kernel) + +params = extract_parameters(gp) +model, θ = parameterize(gp) +θ_flat, unflatten = ParameterHandling.flatten(θ) +gp_model = model(unflatten(θ_flat)) + +function collect_concrete_kernels(T::Type) + result = Set{Type}() + + function recurse(t) + for s in subtypes(t) + if isabstracttype(s) + recurse(s) # dive into abstract types + else + push!(result, s) # collect concrete type + end + end + end + + recurse(T) + return collect(result) +end + +# Retrieve all kernel types +all_kernels = collect_concrete_kernels(KernelFunctions.Kernel) +for k in all_kernels + println(k) +end +println("Found $(length(all_kernels)) concrete kernel types:") + +all_transforms = all_concrete_subtypes(KernelFunctions.Transform) +println("Found $(length(all_transforms)) concrete transformation types.") + +# Retrieve all transformation types +transform_types = all_transform_types() +println("Found $(length(transform_types)) transformation types.") + +for KT in all_kernel_types + try + obj = KT() # maybe you need default constructors + extract_parameters(obj) + catch e + println("Missing extract_parameters for $KT: $e") + end +end \ No newline at end of file From a853620104680342d71650f0cab58d932e466aa0 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 20 Aug 2025 20:41:07 +0200 Subject: [PATCH 052/117] Add more kernels and transforms for automatic parameter handling --- src/models/gp/parameterization.jl | 240 +++++++++++++++++++++++------- 1 file changed, 186 insertions(+), 54 deletions(-) diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl index 33be8c53b..84ba3cdb5 100644 --- a/src/models/gp/parameterization.jl +++ b/src/models/gp/parameterization.jl @@ -1,83 +1,215 @@ +""" + Parameterized{T} + +Wraps an object of type `T` to make it callable with parameters `θ`. +Calling `p(θ)` returns the object updated with the parameters. +""" struct Parameterized{T} object::T end +""" + (p::Parameterized)(θ) + +Apply the parameters `θ` to the underlying object, returning a new object +with those parameters. +""" function (p::Parameterized)(θ) return apply_parameters(p.object, ParameterHandling.value(θ)) end -# """ -# parameterize(object) -> model, θ +""" + parameterize(object) -> (model, θ) -# Turn `object` into a callable parameterized version of itself and a parameter `θ`. -# After assigning `model, θ = parameterize(object)`, calling `model(θ)` will yield the same -# `object` back. -# """ +Wrap `object` into a `Parameterized` callable and return its current parameters `θ`. +Calling `model(θ)` will return the model with its current paramters. +""" parameterize(object) = Parameterized(object), extract_parameters(object) -# Custom wrappers -struct NoisyGP{T<:GP,Tn<:Real} - gp::T - obs_noise::Tn -end - -(gp::NoisyGP)(x) = gp.gp(x, gp.obs_noise) - -with_gaussian_noise(gp::GP, obs_noise::Real) = NoisyGP(gp, obs_noise) - -extract_parameters(f::NoisyGP) = ( - extract_parameters(f.gp), - ParameterHandling.positive(f.obs_noise, exp, 1e-6) - ) -apply_parameters(f::NoisyGP, θ) = NoisyGP(apply_parameters(f.gp, θ[1]), θ[2]) +# ---------------- Mean functions ---------------- +""" + extract_parameters(m::MeanType) -# Mean functions +Return the free parameters of a mean function, wrapped in `ParameterHandling`. +- `ZeroMean` has no parameters → returns `nothing`. +- `ConstMean` has one parameter → returns the constant value. +""" extract_parameters(::ZeroMean) = nothing -apply_parameters(m::ZeroMean, θ) = m - extract_parameters(m::ConstMean) = m.c -apply_parameters(::ConstMean, θ) = ConstMean(θ) - -# Simple kernels -KernelsWithoutParameters = Union{SEKernel,Matern32Kernel,Matern52Kernel,WhiteKernel} -extract_parameters(::T) where {T<:KernelsWithoutParameters} = nothing -apply_parameters(k::T, θ) where {T<:KernelsWithoutParameters} = k +""" + apply_parameters(m::MeanType, θ) -extract_parameters(k::PeriodicKernel) = ParameterHandling.positive(only(k.r)) -apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=[θ]) - -extract_parameters(k::RationalQuadraticKernel) = ParameterHandling.positive(only(k.α)) -apply_parameters(k::RationalQuadraticKernel, θ) = RationalQuadraticKernel(; α=θ, metric=k.metric) - -extract_parameters(k::ConstantKernel) = ParameterHandling.positive(only(k.c)) -apply_parameters(k::ConstantKernel, θ) = ConstantKernel(; c=θ) +Return a new mean function with parameters `θ` applied. +- For `ZeroMean`, returns the same object. +- For `ConstMean`, returns a new `ConstMean` with `c = θ`. +""" +apply_parameters(m::ZeroMean, θ) = m +apply_parameters(::ConstMean, θ) = ConstMean(θ) -# Composite kernels +# ---------------- Kernel functions ---------------- +# Kernels and transforms without parameters +BaseKernelsWithoutParameters = Union{ + ZeroKernel, WhiteKernel, CosineKernel, + SqExponentialKernel, ExponentialKernel, + ExponentiatedKernel, Matern32Kernel, + Matern52Kernel, NeuralNetworkKernel, + PiecewisePolynomialKernel, WienerKernel +} + +# TODO: GibbsKernel has a lengthscale function which could depend on trainable parameters +KernelsWithoutParameters = Union{GibbsKernel} + +# TODO: FunctionTransform has a transformation function which could depend on trainable parameters +TransformsWithoutParameters = Union{FunctionTransform, SelectTransform, IdentityTransform} + +AllWithoutParameters = Union{ + BaseKernelsWithoutParameters, + KernelsWithoutParameters, + TransformsWithoutParameters +} + +""" + extract_parameters(obj) + +Return the free parameters of `obj` wrapped in `ParameterHandling`. +- For kernels: positive or bounded constraints are enforced. +- For kernel compositions (sum, product, tensor, scaled, transformed), returns a tuple or vector of parameter sets. +- For kernels without parameters, returns `nothing`. +- For transforms: returns trainable parameters if any, otherwise `nothing`. +""" +# no paramters +extract_parameters(::T) where {T<:AllWithoutParameters} = nothing + +# basekernels (see KernelFunctions.jl src/basekernels) +extract_parameters(k::ConstantKernel) = ParameterHandling.positive(k.c) +extract_parameters(k::GammaExponentialKernel) = ParameterHandling.bounded(k.γ, 0.0, 2.0) +extract_parameters(k::FBMKernel) = ParameterHandling.bounded(k.h, 0.0, 1.0) +extract_parameters(k::MaternKernel) = ParameterHandling.positive(k.ν) +extract_parameters(k::PeriodicKernel) = ParameterHandling.positive(k.r) +extract_parameters(k::LinearKernel) = ParameterHandling.positive(k.c) +extract_parameters(k::PolynomialKernel) = ParameterHandling.positive(k.c) +extract_parameters(k::RationalKernel) = ParameterHandling.positive(k.α) +extract_parameters(k::RationalQuadraticKernel) = ParameterHandling.positive(k.α) +extract_parameters(k::GammaRationalKernel) = ( + ParameterHandling.positive(k.α), + ParameterHandling.bounded(k.γ, 0.0, 2.0) +) + +# kernels (see KernelFunctions.jl src/kernels) +# TODO: NeuralKernelNetwork not implemented +extract_parameters(k::KernelProduct) = map(extract_parameters, k.kernels) extract_parameters(k::KernelSum) = map(extract_parameters, k.kernels) -apply_parameters(k::KernelSum, θ) = KernelSum(map(apply_parameters, k.kernels, θ)) +extract_parameters(k::KernelTensorProduct) = map(extract_parameters, k.kernels) +extract_parameters(k::NormalizedKernel) = extract_parameters(k.kernel) +extract_parameters(k::ScaledKernel) = (extract_parameters(k.kernel), ParameterHandling.positive(only(k.σ²))) +extract_parameters(k::TransformedKernel) = (extract_parameters(k.kernel), extract_parameters(k.transform)) -extract_parameters(k::KernelProduct) = map(extract_parameters, k.kernels) +# transform (see KernelFunctions.jl src/transform) +extract_parameters(t::ARDTransform) = ParameterHandling.positive(t.v) +extract_parameters(t::ChainTransform) = map(extract_parameters, t.transforms) +extract_parameters(t::LinearTransform) = t.A +extract_parameters(t::PeriodicTransform) = ParameterHandling.positive(t.f) +extract_parameters(t::ScaleTransform) = ParameterHandling.positive(t.s) + +""" + apply_parameters(obj, θ) + +Return a new object with parameters `θ` applied. +- Works for kernels, compositions, and transforms. +- For objects without parameters, returns the object unchanged. +- For compositions, expects a tuple or vector of parameter sets matching the structure. +""" +# no parameters +apply_parameters(k::T, θ) where {T<:AllWithoutParameters} = k + +# basekernels +apply_parameters(::ConstantKernel, θ) = ConstantKernel(; c=only(θ)) +apply_parameters(::GammaExponentialKernel, θ) = GammaExponentialKernel(; γ=only(θ)) +apply_parameters(::FBMKernel, θ) = FBMKernel(; h=only(θ)) +apply_parameters(::MaternKernel, θ) = MaternKernel(; ν=only(θ)) +apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=θ) +apply_parameters(::LinearKernel, θ) = LinearKernel(; c=only(θ)) +apply_parameters(::PolynomialKernel, θ) = PolynomialKernel(; c=only(θ)) +apply_parameters(::RationalKernel, θ) = RationalKernel(; α=only(θ)) +apply_parameters(::RationalQuadraticKernel, θ) = RationalQuadraticKernel(; α=only(θ)) +apply_parameters(::GammaRationalKernel, θ) = GammaRationalKernel(; α=only(θ[1]), γ=only(θ[2])) + +# kernels +# TODO: NeuralKernelNetwork not implemented apply_parameters(k::KernelProduct, θ) = KernelProduct(map(apply_parameters, k.kernels, θ)) - -extract_parameters(k::TransformedKernel) = (extract_parameters(k.kernel), extract_parameters(k.transform)) +apply_parameters(k::KernelSum, θ) = KernelSum(map(apply_parameters, k.kernels, θ)) +apply_parameters(k::KernelTensorProduct, θ) = KernelTensorProduct(map(apply_parameters, k.kernels, θ)) +apply_parameters(k::NormalizedKernel, θ) = NormalizedKernel(apply_parameters(k.kernel, θ)) +apply_parameters(k::ScaledKernel, θ) = ScaledKernel(apply_parameters(k.kernel, θ[1]), θ[2]) apply_parameters(k::TransformedKernel, θ) = TransformedKernel( apply_parameters(k.kernel, θ[1]), apply_parameters(k.transform, θ[2]) ) -extract_parameters(k::ScaledKernel) = (extract_parameters(k.kernel), ParameterHandling.positive(only(k.σ²))) -apply_parameters(k::ScaledKernel, θ) = ScaledKernel(apply_parameters(k.kernel, θ[1]), θ[2]) - -# Transforms -# !WARNING: Incomplete -extract_parameters(t::ScaleTransform) = ParameterHandling.positive(only(t.s)) +# transform +apply_parameters(::ARDTransform, θ) = ARDTransform(θ) +apply_parameters(t::ChainTransform, θ) = ChainTransform(map(apply_parameters, t.transforms, θ)) +apply_parameters(::LinearTransform, θ) = LinearTransform(θ) +apply_parameters(::PeriodicTransform, θ) = PeriodicTransform(θ) apply_parameters(::ScaleTransform, θ) = ScaleTransform(θ) -extract_parameters(t::ARDTransform) = ParameterHandling.positive(t.v) -apply_parameters(::ARDTransform, θ) = ARDTransform(θ) +# ---------------- Gaussian Processes ---------------- + +""" + extract_parameters(f::GP) -# GPs +Return the free parameters of a GP as a tuple: `(mean_params, kernel_params)`. +""" extract_parameters(f::GP) = (extract_parameters(f.mean), extract_parameters(f.kernel)) + +""" + apply_parameters(f::GP, θ) + +Return a new GP with parameters `θ` applied: +- `θ[1]` → mean parameters +- `θ[2]` → kernel parameters +""" apply_parameters(f::GP, θ) = GP( - apply_parameters(f.mean, θ[1]), apply_parameters(f.kernel, θ[2]) - ) \ No newline at end of file + apply_parameters(f.mean, θ[1]), + apply_parameters(f.kernel, θ[2]) +) + +""" + NoisyGP + +A wrapper around `GP` that adds Gaussian observation noise `obs_noise`. +""" +struct NoisyGP{T<:GP,Tn<:Real} + gp::T + obs_noise::Tn +end + +(gp::NoisyGP)(x) = gp.gp(x, gp.obs_noise) + +""" + with_gaussian_noise(gp::GP, obs_noise::Real) + +Wrap a GP with Gaussian observation noise. +""" +with_gaussian_noise(gp::GP, obs_noise::Real) = NoisyGP(gp, obs_noise) + +""" + extract_parameters(f::NoisyGP) + +Return the free parameters of a noisy GP: +- `(gp_params, obs_noise_param)` +- Observation noise is constrained positive using `ParameterHandling`. +""" +extract_parameters(f::NoisyGP) = ( + extract_parameters(f.gp), + ParameterHandling.positive(f.obs_noise, exp, 1e-6) +) + +""" + apply_parameters(f::NoisyGP, θ) + +Return a new noisy GP with parameters `θ` applied: +- `θ[1]` → GP parameters +- `θ[2]` → observation noise +""" +apply_parameters(f::NoisyGP, θ) = NoisyGP(apply_parameters(f.gp, θ[1]), θ[2]) \ No newline at end of file From b27131a555f9866ac7bf34cd99a25dd634cc650a Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 20 Aug 2025 20:58:56 +0200 Subject: [PATCH 053/117] Minimize documentation overhead --- src/models/gp/parameterization.jl | 180 +++++++++++------------------- src/models/gp/standardization.jl | 61 ++-------- 2 files changed, 77 insertions(+), 164 deletions(-) diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl index 84ba3cdb5..d5162d8c7 100644 --- a/src/models/gp/parameterization.jl +++ b/src/models/gp/parameterization.jl @@ -1,53 +1,49 @@ """ - Parameterized{T} +Parameterized objects: a uniform interface for trainable models. -Wraps an object of type `T` to make it callable with parameters `θ`. -Calling `p(θ)` returns the object updated with the parameters. +`Parameterized(obj)` wraps an object so it can be called with a parameter vector `θ`: + ```julia + model, θ = parameterize(obj) + model(θ) # returns a new object with parameters applied + This works for mean functions, kernels, transformations, and Gaussian processes. + +The system relies on two core functions: + + 1. extract_parameters(obj) + + Returns the free parameters of obj wrapped in ParameterHandling containers. + Enforces constraints (e.g., positive or bounded) where applicable. + For composite objects (like KernelSum or GP), returns a tuple or vector of parameter sets. + Returns nothing for objects without trainable parameters. + + 2. apply_parameters(obj, θ) + + Returns a new object of the same type with parameters θ applied. + For hierarchical objects, θ is expected to match the structure returned by extract_parameters. + +This interface enables generic optimization routines to work across all supported types. """ + struct Parameterized{T} object::T end -""" - (p::Parameterized)(θ) - -Apply the parameters `θ` to the underlying object, returning a new object -with those parameters. -""" function (p::Parameterized)(θ) return apply_parameters(p.object, ParameterHandling.value(θ)) end -""" - parameterize(object) -> (model, θ) - -Wrap `object` into a `Parameterized` callable and return its current parameters `θ`. -Calling `model(θ)` will return the model with its current paramters. -""" parameterize(object) = Parameterized(object), extract_parameters(object) # ---------------- Mean functions ---------------- -""" - extract_parameters(m::MeanType) -Return the free parameters of a mean function, wrapped in `ParameterHandling`. -- `ZeroMean` has no parameters → returns `nothing`. -- `ConstMean` has one parameter → returns the constant value. -""" extract_parameters(::ZeroMean) = nothing -extract_parameters(m::ConstMean) = m.c - -""" - apply_parameters(m::MeanType, θ) - -Return a new mean function with parameters `θ` applied. -- For `ZeroMean`, returns the same object. -- For `ConstMean`, returns a new `ConstMean` with `c = θ`. -""" apply_parameters(m::ZeroMean, θ) = m + +extract_parameters(m::ConstMean) = m.c apply_parameters(::ConstMean, θ) = ConstMean(θ) # ---------------- Kernel functions ---------------- + # Kernels and transforms without parameters BaseKernelsWithoutParameters = Union{ ZeroKernel, WhiteKernel, CosineKernel, @@ -69,106 +65,85 @@ AllWithoutParameters = Union{ TransformsWithoutParameters } -""" - extract_parameters(obj) - -Return the free parameters of `obj` wrapped in `ParameterHandling`. -- For kernels: positive or bounded constraints are enforced. -- For kernel compositions (sum, product, tensor, scaled, transformed), returns a tuple or vector of parameter sets. -- For kernels without parameters, returns `nothing`. -- For transforms: returns trainable parameters if any, otherwise `nothing`. -""" -# no paramters +# no parameters extract_parameters(::T) where {T<:AllWithoutParameters} = nothing +apply_parameters(k::T, θ) where {T<:AllWithoutParameters} = k # basekernels (see KernelFunctions.jl src/basekernels) extract_parameters(k::ConstantKernel) = ParameterHandling.positive(k.c) +apply_parameters(::ConstantKernel, θ) = ConstantKernel(; c=only(θ)) + extract_parameters(k::GammaExponentialKernel) = ParameterHandling.bounded(k.γ, 0.0, 2.0) +apply_parameters(::GammaExponentialKernel, θ) = GammaExponentialKernel(; γ=only(θ)) + extract_parameters(k::FBMKernel) = ParameterHandling.bounded(k.h, 0.0, 1.0) +apply_parameters(::FBMKernel, θ) = FBMKernel(; h=only(θ)) + extract_parameters(k::MaternKernel) = ParameterHandling.positive(k.ν) +apply_parameters(::MaternKernel, θ) = MaternKernel(; ν=only(θ)) + extract_parameters(k::PeriodicKernel) = ParameterHandling.positive(k.r) +apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=θ) + extract_parameters(k::LinearKernel) = ParameterHandling.positive(k.c) +apply_parameters(::LinearKernel, θ) = LinearKernel(; c=only(θ)) + extract_parameters(k::PolynomialKernel) = ParameterHandling.positive(k.c) +apply_parameters(::PolynomialKernel, θ) = PolynomialKernel(; c=only(θ)) + extract_parameters(k::RationalKernel) = ParameterHandling.positive(k.α) +apply_parameters(::RationalKernel, θ) = RationalKernel(; α=only(θ)) + extract_parameters(k::RationalQuadraticKernel) = ParameterHandling.positive(k.α) +apply_parameters(::RationalQuadraticKernel, θ) = RationalQuadraticKernel(; α=only(θ)) + extract_parameters(k::GammaRationalKernel) = ( ParameterHandling.positive(k.α), ParameterHandling.bounded(k.γ, 0.0, 2.0) ) +apply_parameters(::GammaRationalKernel, θ) = GammaRationalKernel(; α=only(θ[1]), γ=only(θ[2])) # kernels (see KernelFunctions.jl src/kernels) # TODO: NeuralKernelNetwork not implemented extract_parameters(k::KernelProduct) = map(extract_parameters, k.kernels) -extract_parameters(k::KernelSum) = map(extract_parameters, k.kernels) -extract_parameters(k::KernelTensorProduct) = map(extract_parameters, k.kernels) -extract_parameters(k::NormalizedKernel) = extract_parameters(k.kernel) -extract_parameters(k::ScaledKernel) = (extract_parameters(k.kernel), ParameterHandling.positive(only(k.σ²))) -extract_parameters(k::TransformedKernel) = (extract_parameters(k.kernel), extract_parameters(k.transform)) - -# transform (see KernelFunctions.jl src/transform) -extract_parameters(t::ARDTransform) = ParameterHandling.positive(t.v) -extract_parameters(t::ChainTransform) = map(extract_parameters, t.transforms) -extract_parameters(t::LinearTransform) = t.A -extract_parameters(t::PeriodicTransform) = ParameterHandling.positive(t.f) -extract_parameters(t::ScaleTransform) = ParameterHandling.positive(t.s) - -""" - apply_parameters(obj, θ) - -Return a new object with parameters `θ` applied. -- Works for kernels, compositions, and transforms. -- For objects without parameters, returns the object unchanged. -- For compositions, expects a tuple or vector of parameter sets matching the structure. -""" -# no parameters -apply_parameters(k::T, θ) where {T<:AllWithoutParameters} = k - -# basekernels -apply_parameters(::ConstantKernel, θ) = ConstantKernel(; c=only(θ)) -apply_parameters(::GammaExponentialKernel, θ) = GammaExponentialKernel(; γ=only(θ)) -apply_parameters(::FBMKernel, θ) = FBMKernel(; h=only(θ)) -apply_parameters(::MaternKernel, θ) = MaternKernel(; ν=only(θ)) -apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=θ) -apply_parameters(::LinearKernel, θ) = LinearKernel(; c=only(θ)) -apply_parameters(::PolynomialKernel, θ) = PolynomialKernel(; c=only(θ)) -apply_parameters(::RationalKernel, θ) = RationalKernel(; α=only(θ)) -apply_parameters(::RationalQuadraticKernel, θ) = RationalQuadraticKernel(; α=only(θ)) -apply_parameters(::GammaRationalKernel, θ) = GammaRationalKernel(; α=only(θ[1]), γ=only(θ[2])) - -# kernels -# TODO: NeuralKernelNetwork not implemented apply_parameters(k::KernelProduct, θ) = KernelProduct(map(apply_parameters, k.kernels, θ)) + +extract_parameters(k::KernelSum) = map(extract_parameters, k.kernels) apply_parameters(k::KernelSum, θ) = KernelSum(map(apply_parameters, k.kernels, θ)) + +extract_parameters(k::KernelTensorProduct) = map(extract_parameters, k.kernels) apply_parameters(k::KernelTensorProduct, θ) = KernelTensorProduct(map(apply_parameters, k.kernels, θ)) + +extract_parameters(k::NormalizedKernel) = extract_parameters(k.kernel) apply_parameters(k::NormalizedKernel, θ) = NormalizedKernel(apply_parameters(k.kernel, θ)) + +extract_parameters(k::ScaledKernel) = (extract_parameters(k.kernel), ParameterHandling.positive(only(k.σ²))) apply_parameters(k::ScaledKernel, θ) = ScaledKernel(apply_parameters(k.kernel, θ[1]), θ[2]) + +extract_parameters(k::TransformedKernel) = (extract_parameters(k.kernel), extract_parameters(k.transform)) apply_parameters(k::TransformedKernel, θ) = TransformedKernel( apply_parameters(k.kernel, θ[1]), apply_parameters(k.transform, θ[2]) ) -# transform +# transform (see KernelFunctions.jl src/transform) +extract_parameters(t::ARDTransform) = ParameterHandling.positive(t.v) apply_parameters(::ARDTransform, θ) = ARDTransform(θ) + +extract_parameters(t::ChainTransform) = map(extract_parameters, t.transforms) apply_parameters(t::ChainTransform, θ) = ChainTransform(map(apply_parameters, t.transforms, θ)) + +extract_parameters(t::LinearTransform) = t.A apply_parameters(::LinearTransform, θ) = LinearTransform(θ) + +extract_parameters(t::PeriodicTransform) = ParameterHandling.positive(t.f) apply_parameters(::PeriodicTransform, θ) = PeriodicTransform(θ) + +extract_parameters(t::ScaleTransform) = ParameterHandling.positive(t.s) apply_parameters(::ScaleTransform, θ) = ScaleTransform(θ) # ---------------- Gaussian Processes ---------------- -""" - extract_parameters(f::GP) - -Return the free parameters of a GP as a tuple: `(mean_params, kernel_params)`. -""" extract_parameters(f::GP) = (extract_parameters(f.mean), extract_parameters(f.kernel)) - -""" - apply_parameters(f::GP, θ) - -Return a new GP with parameters `θ` applied: -- `θ[1]` → mean parameters -- `θ[2]` → kernel parameters -""" apply_parameters(f::GP, θ) = GP( apply_parameters(f.mean, θ[1]), apply_parameters(f.kernel, θ[2]) @@ -185,31 +160,10 @@ struct NoisyGP{T<:GP,Tn<:Real} end (gp::NoisyGP)(x) = gp.gp(x, gp.obs_noise) - -""" - with_gaussian_noise(gp::GP, obs_noise::Real) - -Wrap a GP with Gaussian observation noise. -""" with_gaussian_noise(gp::GP, obs_noise::Real) = NoisyGP(gp, obs_noise) -""" - extract_parameters(f::NoisyGP) - -Return the free parameters of a noisy GP: -- `(gp_params, obs_noise_param)` -- Observation noise is constrained positive using `ParameterHandling`. -""" extract_parameters(f::NoisyGP) = ( extract_parameters(f.gp), ParameterHandling.positive(f.obs_noise, exp, 1e-6) ) - -""" - apply_parameters(f::NoisyGP, θ) - -Return a new noisy GP with parameters `θ` applied: -- `θ[1]` → GP parameters -- `θ[2]` → observation noise -""" apply_parameters(f::NoisyGP, θ) = NoisyGP(apply_parameters(f.gp, θ[1]), θ[2]) \ No newline at end of file diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index acec3e7c0..97f06b70f 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -1,25 +1,18 @@ """ - AbstractInputTransform +Input/output transformations for datasets. -Abstract type for input transformations used to describe how input features (columns of a DataFrame) should be -preprocessed before fitting a model (e.g. no transform, z-score standardization). -""" -abstract type AbstractInputTransform end +- `AbstractInputTransform` / `AbstractOutputTransform`: base types for input and output preprocessing. +- `DataStandardization`: holds the chosen input and output transformations. +- `build_datatransform(data, input/output, transform)`: returns functions that apply (and, for outputs, invert) the transformations to a `DataFrame`. +Predefined transforms include: + - `NoInputTransform` / `NoOutputTransform`: no change. + - `ZScoreInputTransform` / `ZScoreOutputTransform`: standardize to zero mean, unit variance. """ - AbstractOutputTransform -Abstract type for output transformations used to describe how model output (columns of a DataFrame) should be -preprocessed before fitting a model (e.g. no transform, z-score standardization). -""" +abstract type AbstractInputTransform end abstract type AbstractOutputTransform end -""" - DataStandardization(input::AbstractInputTransform, output::AbstractOutputTransform) - -Container that holds the input and output transformation strategies to be applied -to a dataset. -""" struct DataStandardization input_transform::AbstractInputTransform output_transform::AbstractOutputTransform @@ -29,46 +22,20 @@ DataStandardization() = DataStandardization(NoInputTransform(), NoOutputTransfor # ---------------- Input transforms ---------------- -""" - NoInputTransform(input) - -No transformation is applied to the specified input columns. -""" struct NoInputTransform <: AbstractInputTransform end - -""" - ZScoreInputTransform(input) - -Applies z-score standardization (mean 0, variance 1) to the specified input columns. -""" - struct ZScoreInputTransform <: AbstractInputTransform end # ---------------- Output transforms ---------------- -""" - NoOutputTransform(output) - -No transformation is applied to the specified output column. -""" struct NoOutputTransform <: AbstractOutputTransform end - -""" - ZScoreOutputTransform(output) - -Applies z-score standardization (mean 0, variance 1) to the specified output column. -Provides both forward (`f`) and inverse (`f⁻¹`) transformations. -""" struct ZScoreOutputTransform <: AbstractOutputTransform end # ---------------- Builders ---------------- """ - build_datatransform(data::DataFrame, transform::AbstractInputTransform) +build_datatransform(data, input/output, transform) -Builds a transformation function `f(df::DataFrame) -> Array` that converts -the specified input columns of `df` into an array, optionally applying a -standardization transform. +Returns a function (or pair of functions for outputs) that applies the specified transformation to the dataset. """ function build_datatransform( data::DataFrame, @@ -105,14 +72,6 @@ build_datatransform( transform::ZScoreInputTransform ) = build_datatransform(data, names(input), transform) -""" - build_datatransform(data::DataFrame, transform::AbstractOutputTransform) - -Builds a tuple `(f, f⁻¹)` of transformation functions for the specified output column: - -- `f(df)` applies the output transformation to data. -- `f⁻¹(Y)` reverses the transformation for predictions. -""" function build_datatransform( data::DataFrame, output::Symbol, From 5c78fc6740916ae74007e5c79a76fa4ebc0b74bd Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 2 Sep 2025 17:57:10 +0200 Subject: [PATCH 054/117] Add DataTransforms for in- and output transformation --- src/models/gp/standardization.jl | 178 +++++++++++++++++++++---------- 1 file changed, 123 insertions(+), 55 deletions(-) diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index 97f06b70f..334c7a1ec 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -2,7 +2,7 @@ Input/output transformations for datasets. - `AbstractInputTransform` / `AbstractOutputTransform`: base types for input and output preprocessing. -- `DataStandardization`: holds the chosen input and output transformations. +- `DataTransforms`: holds the chosen input and output transformations. - `build_datatransform(data, input/output, transform)`: returns functions that apply (and, for outputs, invert) the transformations to a `DataFrame`. Predefined transforms include: @@ -13,36 +13,53 @@ Predefined transforms include: abstract type AbstractInputTransform end abstract type AbstractOutputTransform end -struct DataStandardization - input_transform::AbstractInputTransform - output_transform::AbstractOutputTransform -end - -DataStandardization() = DataStandardization(NoInputTransform(), NoOutputTransform()) - # ---------------- Input transforms ---------------- - struct NoInputTransform <: AbstractInputTransform end struct ZScoreInputTransform <: AbstractInputTransform end +struct UnitRangeInputTransform <: AbstractInputTransform end +struct SNSInputTransform <: AbstractInputTransform end # ---------------- Output transforms ---------------- - struct NoOutputTransform <: AbstractOutputTransform end struct ZScoreOutputTransform <: AbstractOutputTransform end +struct UnitRangeOutputTransform <: AbstractOutputTransform end -# ---------------- Builders ---------------- +# ---------------- Struct for bundled transforms ---------------- +struct DataTransforms + fᵢ::Function + fₒ::Function + fₒ⁻¹::Function +end +# ---------------- Constructor ---------------- +function DataTransforms( + data::DataFrame, + input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, + output::Symbol, + input_transform::AbstractInputTransform, + output_transform::AbstractOutputTransform +) + fᵢ = build_datatransform(data, input, input_transform) + fₒ, fₒ⁻¹ = build_datatransform(data, output, output_transform) + return DataTransforms(fᵢ, fₒ, fₒ⁻¹) +end + +# ---------------- Transform builders ---------------- """ build_datatransform(data, input/output, transform) -Returns a function (or pair of functions for outputs) that applies the specified transformation to the dataset. +Returns a function (or pair of functions for outputs) that applies the specified transformation to a dataframe. """ +# ---------------- Input ---------------- +# No input transformation function build_datatransform( - data::DataFrame, + ::DataFrame, input::Union{Symbol, Vector{<:Symbol}}, - transform::NoInputTransform + ::NoInputTransform ) - f(df::DataFrame) = _dataframe_to_array(df, input) + f(df::DataFrame) = to_gp_format( + dataframe_to_array(df, input) + ) return f end @@ -52,17 +69,23 @@ build_datatransform( transform::NoInputTransform ) = build_datatransform(data, names(input), transform) + # ZScore input transformation function build_datatransform( data::DataFrame, input::Union{Symbol, Vector{<:Symbol}}, - transform::ZScoreInputTransform + ::ZScoreInputTransform ) - input_array = _dataframe_to_array(data, input) - zscore_transform = fit(ZScoreTransform, input_array; dims=1) - f(df::DataFrame) = StatsBase.transform( + zscore_transform = fit( + ZScoreTransform, + dataframe_to_array(data, input); + dims=1 + ) + f(df::DataFrame) = to_gp_format( + StatsBase.transform( zscore_transform, - _dataframe_to_array(df, input) + dataframe_to_array(df, input) ) + ) return f end @@ -72,62 +95,107 @@ build_datatransform( transform::ZScoreInputTransform ) = build_datatransform(data, names(input), transform) +# UnitRange input transformation function build_datatransform( + data::DataFrame, + input::Union{Symbol, Vector{<:Symbol}}, + ::UnitRangeInputTransform +) + unitrange_transform = fit( + UnitRangeTransform, + dataframe_to_array(data, input); + dims=1 + ) + f(df::DataFrame) = to_gp_format( + StatsBase.transform( + unitrange_transform, + dataframe_to_array(df, input) + ) + ) + return f +end + +build_datatransform( data::DataFrame, + input::Union{UQInput, Vector{<:UQInput}}, + transform::UnitRangeInputTransform + ) = build_datatransform(data, names(input), transform) + +# SNS input transform +function build_datatransform( + ::DataFrame, + input::Union{UQInput, Vector{<:UQInput}}, + ::SNSInputTransform +) + function f(df::DataFrame) + df_copy = copy(df) + to_standard_normal_space!(input, df_copy) + return to_gp_format( + dataframe_to_array(df_copy, names(input)) + ) + end + return f +end + +# ---------------- Output ---------------- +# No output transformation +function build_datatransform( + ::DataFrame, output::Symbol, - transform::NoOutputTransform + ::NoOutputTransform ) - f(df::DataFrame) = _dataframe_to_array(df, output) + f(df::DataFrame) = to_gp_format( + dataframe_to_array(df, output) + ) f⁻¹(Y::AbstractArray) = Y return (f, f⁻¹) end +# ZScore output transformation function build_datatransform( data::DataFrame, output::Symbol, - transform::ZScoreOutputTransform + ::ZScoreOutputTransform ) - output_array = _dataframe_to_array(data, output) # will fail if Y is not an array - zscore_transform = fit(ZScoreTransform, output_array; dims=1) - f(df::DataFrame) = StatsBase.transform( + zscore_transform = fit( + ZScoreTransform, + dataframe_to_array(data, output); + dims=1 + ) + f(df::DataFrame) = to_gp_format( + StatsBase.transform( zscore_transform, - _dataframe_to_array(df, output) + dataframe_to_array(df, output) ) + ) f⁻¹(Y::AbstractArray) = StatsBase.reconstruct(zscore_transform, Y) return return (f, f⁻¹) end -function build_datatransforms( - data::DataFrame, - input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, +# UnitRange output transformation +function build_datatransform( + data::DataFrame, output::Symbol, - ds::DataStandardization + ::UnitRangeOutputTransform ) - fᵢ = build_datatransform(data, input, ds.input_transform) - fₒ, fₒ⁻¹ = build_datatransform(data, output, ds.output_transform) - return (fᵢ, fₒ, fₒ⁻¹) + unitrange_transform = fit( + UnitRangeTransform, + dataframe_to_array(data, output); + dims=1 + ) + f(df::DataFrame) = to_gp_format( + StatsBase.transform( + unitrange_transform, + dataframe_to_array(df, output) + ) + ) + f⁻¹(Y::AbstractArray) = StatsBase.reconstruct(unitrange_transform, Y) + return return (f, f⁻¹) end - -# ### Standard normal transform -# struct UQInputTransform <: AbstractInputTransform -# uqinput::Union{UQInput, Vector{<:UQInput}} -# end - -# function (transform::UQInputTransform)(df::DataFrame) -# df_copy = copy(df) -# uqinput_names = names(transform.uqinput) -# to_standard_normal_space!(transform.uqinput, df_copy) -# # X is a Matrix for multiple inputs, else it is a Vector -# X = _dataframe_to_array(df_copy, uqinput_names) -# return X -# end - # ---------------- Utility ---------------- +to_gp_format(x::Vector) = x +to_gp_format(x::Matrix) = RowVecs(x) -_dataframe_to_array(df::DataFrame, name::Symbol) = df[:, name] - -function _dataframe_to_array(df::DataFrame, names::Vector{<:Symbol}) - length(names) == 1 ? x = _dataframe_to_array(df, only(names)) : x = RowVecs(Matrix(df[:, names])) - return x -end \ No newline at end of file +dataframe_to_array(df::DataFrame, name::Symbol) = df[:, name] +dataframe_to_array(df::DataFrame, names::Vector{<:Symbol}) = length(names) == 1 ? x = dataframe_to_array(df, only(names)) : x = Matrix(df[:, names]) \ No newline at end of file From bc773310aede317f1029625e22ba017899e84a15 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 2 Sep 2025 17:57:59 +0200 Subject: [PATCH 055/117] Change to single DataTransforms struct to handle transformations --- src/models/gp/gaussianprocess.jl | 216 ++++++------------------------- 1 file changed, 43 insertions(+), 173 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 4ef688a21..b7c66e16f 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -2,215 +2,85 @@ struct GaussianProcess <: UQModel gp::AbstractGPs.PosteriorGP input::Union{Symbol, Vector{Symbol}} output::Symbol - input_transform::Function - output_transform::Function + data_transforms::DataTransforms end -# Build from Dataframe +# ---------------- Build from DataFrame ---------------- function GaussianProcess( gp::Union{AbstractGPs.GP, NoisyGP}, data::DataFrame, - output::Symbol, - instandard::StandardizeInput, - outstandard::StandardizeOutput, - optimization::AbstractHyperparameterOptimization -) # should we use keyword args? - input = propertynames(data[:, Not(output)]) - datatransformer = DataTransform( - data, instandard, input, outstandard, output + output::Symbol; + input_transform::AbstractInputTransform=ZScoreInputTransform(), + output_transform::AbstractOutputTransform=ZScoreOutputTransform(), + optimization::AbstractHyperparameterOptimization=MaximumLikelihoodEstimation() +) + input = propertynames(data[:, Not(output)]) # Is this always the case? + + # build in- and output transforms + dts = DataTransforms( + data, input, output, + input_transform, + output_transform ) - x = datatransformer.input(data) - y = datatransformer.output(data) + + # transform data + x = dts.fᵢ(data) + y = dts.fₒ(data) + + # build posterior gp optimized_gp = optimize_hyperparameters(gp, x, y, optimization) posterior_gp = posterior(optimized_gp(x), y) return GaussianProcess( posterior_gp, input, output, - datatransformer - ) -end - -function GaussianProcess( - gp::AbstractGPs.GP, - data::DataFrame, - output::Symbol, - instandard::StandardizeInput, - optimization::AbstractHyperparameterOptimization -) # should we use keyword args? - return GaussianProcess( - gp, - data, - output, - instandard, - StandardizeOutput(false), - optimization - ) -end - -function GaussianProcess( - gp::AbstractGPs.GP, - data::DataFrame, - output::Symbol, - outstandard::StandardizeOutput, - optimization::AbstractHyperparameterOptimization -) # should we use keyword args? - return GaussianProcess( - gp, - data, - output, - StandardizeInput(false), - outstandard, - optimization - ) -end - -function GaussianProcess( - gp::AbstractGPs.GP, - data::DataFrame, - output::Symbol, - optimization::AbstractHyperparameterOptimization -) # should we use keyword args? - return GaussianProcess( - gp, - data, - output, - StandardizeInput(false), - StandardizeOutput(false), - optimization - ) -end - -function GaussianProcess( - gp::AbstractGPs.GP, - data::DataFrame, - output::Symbol -) # should we use keyword args? - return GaussianProcess( - gp, - data, - output, - StandardizeInput(false), - StandardizeOutput(false), - NoOptimization() + dts ) end -# Build with UQmodel +# ---------------- Build from UQModel ---------------- function GaussianProcess( gp::Union{AbstractGPs.GP, NoisyGP}, input::Union{UQInput, Vector{<:UQInput}}, model::Union{UQModel, Vector{<:UQModel}}, output::Symbol, - experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}, - instandard::StandardizeInput, - outstandard::StandardizeOutput, - optimization::AbstractHyperparameterOptimization + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}; + input_transform::AbstractInputTransform=ZScoreInputTransform(), + output_transform::AbstractOutputTransform=ZScoreOutputTransform(), + optimization::AbstractHyperparameterOptimization=MaximumLikelihoodEstimation() ) - data = sample(input, experimentaldesign) # need to be able to pass experimental design + # build DataFrame + data = sample(input, experimentaldesign) evaluate!(model, data) - datatransformer = DataTransform( - data, instandard, input, outstandard, output + # build in- and output transforms + dts = DataTransforms( + data, input, output, + input_transform, + output_transform ) - x = datatransformer.input(data) - y = datatransformer.output(data) + + # transform data + x = dts.fᵢ(data) + y = dts.fₒ(data) + + # build posterior gp optimized_gp = optimize_hyperparameters(gp, x, y, optimization) posterior_gp = posterior(optimized_gp(x), y) + return GaussianProcess( posterior_gp, names(input), output, - datatransformer - ) -end - -function GaussianProcess( - gp::Union{AbstractGPs.GP, NoisyGP}, - input::Union{UQInput, Vector{<:UQInput}}, - model::Union{UQModel, Vector{<:UQModel}}, - output::Symbol, - experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}, - instandard::StandardizeInput, - optimization::AbstractHyperparameterOptimization -) - return GaussianProcess( - gp, - input, - model, - output, - experimentaldesign, - instandard, - StandardizeOutput(false), - optimization - ) -end - -function GaussianProcess( - gp::Union{AbstractGPs.GP, NoisyGP}, - input::Union{UQInput, Vector{<:UQInput}}, - model::Union{UQModel, Vector{<:UQModel}}, - output::Symbol, - experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}, - outstandard::StandardizeOutput, - optimization::AbstractHyperparameterOptimization -) - return GaussianProcess( - gp, - input, - model, - output, - experimentaldesign, - StandardizeInput(false), - outstandard, - optimization - ) -end - -function GaussianProcess( - gp::Union{AbstractGPs.GP, NoisyGP}, - input::Union{UQInput, Vector{<:UQInput}}, - model::Union{UQModel, Vector{<:UQModel}}, - output::Symbol, - experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}, - optimization::AbstractHyperparameterOptimization -) - return GaussianProcess( - gp, - input, - model, - output, - experimentaldesign, - StandardizeInput(false), - StandardizeOutput(false), - optimization - ) -end - -function GaussianProcess( - gp::Union{AbstractGPs.GP, NoisyGP}, - input::Union{UQInput, Vector{<:UQInput}}, - model::Union{UQModel, Vector{<:UQModel}}, - output::Symbol, - experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments} -) - return GaussianProcess( - gp, - input, - model, - output, - experimentaldesign, - StandardizeInput(false), - StandardizeOutput(false), - NoOptimization() + dts ) end # what should this calculate? Calculates only mean for now function evaluate!(gp::GaussianProcess, data::DataFrame) - x = gp.datatransformer.input(data) + x = gp.data_transforms.fᵢ(data) y = mean(gp.gp(x)) - data[!, gp.output] = inverse_transform(y, gp.datatransformer.output) + data[!, gp.output] = gp.data_transforms.fₒ⁻¹(y) # applying inverse transform to output return nothing end \ No newline at end of file From f5c40337a6bd658e08c755a3d190002889a02c3a Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 2 Sep 2025 17:58:45 +0200 Subject: [PATCH 056/117] Export all datatransformations for gaussian processes --- src/UncertaintyQuantification.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index 2b6af319e..c79f440ca 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -84,6 +84,8 @@ export AbstractQuasiMonteCarlo export AbstractSimulation export Copula export DeterministicUQInput +export NoInputTransform, ZScoreInputTransform, UnitRangeInputTransform, SNSInputTransform +export NoOutputTransform, ZScoreOutputTransform, UnitRangeOutputTransform export RandomUQInput export UQInput export UQModel @@ -91,7 +93,6 @@ export UQModel # Structs export AdvancedLineSampling export EmpiricalDistribution -export ExperimentalDesign # Currently used for gps export BackwardFiniteDifferences export BoxBehnken export CentralComposite @@ -143,8 +144,6 @@ export ResponseSurface export ShinozukaDeodatis export SobolSampling export Solver -export StandardizeInput -export StandardizeOutput export SpectralRepresentation export StochasticProcessModel export SubSetInfinity From 337b326f8441ee3c88917d0ee73bb51031af8043 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 2 Sep 2025 17:59:17 +0200 Subject: [PATCH 057/117] Add types of inputs --- src/models/gp/hyperparametertuning.jl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/models/gp/hyperparametertuning.jl b/src/models/gp/hyperparametertuning.jl index d5edc4ba9..4b42f55e0 100644 --- a/src/models/gp/hyperparametertuning.jl +++ b/src/models/gp/hyperparametertuning.jl @@ -11,31 +11,31 @@ end MaximumLikelihoodEstimation() = MaximumLikelihoodEstimation( Optim.LBFGS(), - Optim.Options(; iterations=1000, show_trace=false) + Optim.Options(; iterations=10, show_trace=false) ) function optimize_hyperparameters( gp::Union{AbstractGPs.GP, NoisyGP}, - x, - y, - opt::NoOptimization + ::Union{RowVecs{<:Real}, Vector{<:Real}}, + ::Vector{<:Real}, + ::NoOptimization ) return gp end objective( f::Union{AbstractGPs.GP, NoisyGP}, - x, - y, - mle::MaximumLikelihoodEstimation + x::Union{RowVecs{<:Real}, Vector{<:Real}}, + y::Vector{<:Real}, + ::MaximumLikelihoodEstimation ) = -logpdf(f(x), y) function optimize_hyperparameters( gp::Union{AbstractGPs.GP, NoisyGP}, - x, - y, + x::Union{RowVecs{<:Real}, Vector{<:Real}}, + y::Vector{<:Real}, mle::MaximumLikelihoodEstimation -) #!TYPES +) model, θ₀ = parameterize(gp) θ₀_flat, unflatten = ParameterHandling.flatten(θ₀) @@ -43,7 +43,7 @@ function optimize_hyperparameters( θ -> objective(model(unflatten(θ)), x, y, mle), θ₀_flat, mle.optimizer, mle.options; - autodiff=AutoZygote() + autodiff= AutoZygote() ) return model(unflatten(result.minimizer)) end \ No newline at end of file From 3d7de71231bca675493bdfc3af05711a619a3247 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 2 Sep 2025 18:00:41 +0200 Subject: [PATCH 058/117] Preliminary demo --- demo/metamodels/gaussianprocess.jl | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/demo/metamodels/gaussianprocess.jl b/demo/metamodels/gaussianprocess.jl index 4a5b8ee58..fe6509b99 100644 --- a/demo/metamodels/gaussianprocess.jl +++ b/demo/metamodels/gaussianprocess.jl @@ -1,32 +1,28 @@ using UncertaintyQuantification using AbstractGPs using Random +using Optim + # Setup Himmelblau example x = RandomVariable.(Uniform(-5, 5), [:x1, :x2]) himmelblau = Model( df -> (df.x1 .^ 2 .+ df.x2 .- 11) .^ 2 .+ (df.x1 .+ df.x2 .^ 2 .- 7) .^ 2, :y ) -design = FullFactorial([8, 8]) +design = LatinHypercubeSampling(100) training_data = sample(x, design) evaluate!(himmelblau, training_data) -# This will be used for proper initial guesses for the parameters of the GP -mean_data = mean(training_data[!, :y]) -std_data = std(training_data[!, :y]) - # Setup the GP -# Note: If we do not initialize the parameters here properly the optimization will fail. Standardization should help with that. σ² = 1e-5 -kernel = SqExponentialKernel() ∘ ARDTransform([1.0, 1.0]) +kernel = SqExponentialKernel() ∘ ARDTransform([0.5, 0.5]) gp = with_gaussian_noise(GP(0.0, kernel), σ²) -optimizer = MLE() -# TODO: StandardizeInput breaks currently due to -Inf and Inf values from to_standard_normal_space!() -# TODO: Optimization is extremely unstable -# TODO: Not all kernels have a extract_parameters and apply_parameters function +optimizer = MaximumLikelihoodEstimation(Optim.Adam(alpha=0.01), Optim.Options(; iterations=1000, show_trace=false)) +# optimizer = MaximumLikelihoodEstimation(Optim.LBFGS(), Optim.Options(; iterations=10, show_trace=false)) + gpr = GaussianProcess( - gp, x, himmelblau, :y, design, StandardizeOutput(), MLE() + gp, x, himmelblau, :y, design; input_transform=UnitRangeInputTransform(), output_transform=UnitRangeOutputTransform(), optimization=optimizer ) test_data = sample(x, 1000) @@ -40,6 +36,7 @@ println("MSE is: $mse") using Plots using DataFrames +# SNSInputTransform will crash the plotting routine on -5 and 5 values a = range(-5, 5; length=1000) b = range(5, -5; length=1000) himmelblau_f(x1, x2) = (x1^2 + x2 - 11)^2 + (x1 + x2^2 - 7)^2 From 548cd52e62f9931ffbad926e5a2252536761a30c Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 3 Sep 2025 11:34:47 +0200 Subject: [PATCH 059/117] Refactor data standardization pipeline --- src/models/gp/standardization.jl | 73 ++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index 334c7a1ec..0dd4527e9 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -9,39 +9,40 @@ Predefined transforms include: - `NoInputTransform` / `NoOutputTransform`: no change. - `ZScoreInputTransform` / `ZScoreOutputTransform`: standardize to zero mean, unit variance. """ +abstract type AbstractDataTransform end -abstract type AbstractInputTransform end -abstract type AbstractOutputTransform end +# ---------------- Input/Output transforms ---------------- +struct IdentityTransform <: AbstractDataTransform end +struct ZScoreTransform <: AbstractDataTransform end +struct UnitRangeTransform <: AbstractDataTransform end +struct StandardNormalTransform <: AbstractDataTransform end -# ---------------- Input transforms ---------------- -struct NoInputTransform <: AbstractInputTransform end -struct ZScoreInputTransform <: AbstractInputTransform end -struct UnitRangeInputTransform <: AbstractInputTransform end -struct SNSInputTransform <: AbstractInputTransform end +struct InputTransform{T <: AbstractDataTransform} end +InputTransform(::Type{T}) where {T <: AbstractDataTransform} = InputTransform{T}() +InputTransform(x::AbstractDataTransform) = InputTransform(typeof(x)) -# ---------------- Output transforms ---------------- -struct NoOutputTransform <: AbstractOutputTransform end -struct ZScoreOutputTransform <: AbstractOutputTransform end -struct UnitRangeOutputTransform <: AbstractOutputTransform end +struct OutputTransform{T <: AbstractDataTransform} end +OutputTransform(::Type{T}) where {T <: AbstractDataTransform} = OutputTransform{T}() +OutputTransform(x::AbstractDataTransform) = OutputTransform(typeof(x)) -# ---------------- Struct for bundled transforms ---------------- -struct DataTransforms +# ---------------- Struct for bundled transform functions ---------------- +struct DataStandardizer fᵢ::Function fₒ::Function fₒ⁻¹::Function end # ---------------- Constructor ---------------- -function DataTransforms( +function DataStandardizer( data::DataFrame, input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, output::Symbol, - input_transform::AbstractInputTransform, - output_transform::AbstractOutputTransform + input_transform::InputTransform, + output_transform::OutputTransform ) fᵢ = build_datatransform(data, input, input_transform) fₒ, fₒ⁻¹ = build_datatransform(data, output, output_transform) - return DataTransforms(fᵢ, fₒ, fₒ⁻¹) + return DataStandardizer(fᵢ, fₒ, fₒ⁻¹) end # ---------------- Transform builders ---------------- @@ -55,7 +56,7 @@ Returns a function (or pair of functions for outputs) that applies the specified function build_datatransform( ::DataFrame, input::Union{Symbol, Vector{<:Symbol}}, - ::NoInputTransform + ::InputTransform{IdentityTransform} ) f(df::DataFrame) = to_gp_format( dataframe_to_array(df, input) @@ -66,17 +67,17 @@ end build_datatransform( data::DataFrame, input::Union{UQInput, Vector{<:UQInput}}, - transform::NoInputTransform + transform::InputTransform{IdentityTransform} ) = build_datatransform(data, names(input), transform) # ZScore input transformation function build_datatransform( data::DataFrame, input::Union{Symbol, Vector{<:Symbol}}, - ::ZScoreInputTransform + ::InputTransform{ZScoreTransform} ) zscore_transform = fit( - ZScoreTransform, + StatsBase.ZScoreTransform, dataframe_to_array(data, input); dims=1 ) @@ -92,17 +93,17 @@ end build_datatransform( data::DataFrame, input::Union{UQInput, Vector{<:UQInput}}, - transform::ZScoreInputTransform + transform::InputTransform{ZScoreTransform} ) = build_datatransform(data, names(input), transform) # UnitRange input transformation function build_datatransform( data::DataFrame, input::Union{Symbol, Vector{<:Symbol}}, - ::UnitRangeInputTransform + ::InputTransform{UnitRangeTransform} ) unitrange_transform = fit( - UnitRangeTransform, + StatsBase.UnitRangeTransform, dataframe_to_array(data, input); dims=1 ) @@ -118,14 +119,14 @@ end build_datatransform( data::DataFrame, input::Union{UQInput, Vector{<:UQInput}}, - transform::UnitRangeInputTransform + transform::InputTransform{UnitRangeTransform} ) = build_datatransform(data, names(input), transform) # SNS input transform function build_datatransform( ::DataFrame, input::Union{UQInput, Vector{<:UQInput}}, - ::SNSInputTransform + ::InputTransform{StandardNormalTransform} ) function f(df::DataFrame) df_copy = copy(df) @@ -142,7 +143,7 @@ end function build_datatransform( ::DataFrame, output::Symbol, - ::NoOutputTransform + ::OutputTransform{IdentityTransform} ) f(df::DataFrame) = to_gp_format( dataframe_to_array(df, output) @@ -155,10 +156,10 @@ end function build_datatransform( data::DataFrame, output::Symbol, - ::ZScoreOutputTransform + ::OutputTransform{ZScoreTransform} ) zscore_transform = fit( - ZScoreTransform, + StatsBase.ZScoreTransform, dataframe_to_array(data, output); dims=1 ) @@ -176,10 +177,10 @@ end function build_datatransform( data::DataFrame, output::Symbol, - ::UnitRangeOutputTransform + ::OutputTransform{UnitRangeTransform} ) unitrange_transform = fit( - UnitRangeTransform, + StatsBase.UnitRangeTransform, dataframe_to_array(data, output); dims=1 ) @@ -193,6 +194,16 @@ function build_datatransform( return return (f, f⁻¹) end +function build_datatransform( + ::DataFrame, + ::Symbol, + ::OutputTransform{StandardNormalTransform} +) + throw(ArgumentError( + "StandardNormalTransform is only valid for input transforms." + )) +end + # ---------------- Utility ---------------- to_gp_format(x::Vector) = x to_gp_format(x::Matrix) = RowVecs(x) From 027db46d6e61dcfc57c2c603e7e6bd4f8bcb3190 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 3 Sep 2025 11:35:12 +0200 Subject: [PATCH 060/117] Refactor data standardization pipeline --- src/UncertaintyQuantification.jl | 3 +-- src/models/gp/gaussianprocess.jl | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index c79f440ca..cb818cf8c 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -84,8 +84,6 @@ export AbstractQuasiMonteCarlo export AbstractSimulation export Copula export DeterministicUQInput -export NoInputTransform, ZScoreInputTransform, UnitRangeInputTransform, SNSInputTransform -export NoOutputTransform, ZScoreOutputTransform, UnitRangeOutputTransform export RandomUQInput export UQInput export UQModel @@ -113,6 +111,7 @@ export GaussianProcess export GaussQuadrature export HaltonSampling export HermiteBasis +export IdentityTransform, ZScoreTransform, UnitRangeTransform, StandardNormalTransform export ImportanceSampling export Interval export IntervalVariable diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index b7c66e16f..cfe4ad15d 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -2,7 +2,7 @@ struct GaussianProcess <: UQModel gp::AbstractGPs.PosteriorGP input::Union{Symbol, Vector{Symbol}} output::Symbol - data_transforms::DataTransforms + standardizer::DataStandardizer end # ---------------- Build from DataFrame ---------------- @@ -10,17 +10,17 @@ function GaussianProcess( gp::Union{AbstractGPs.GP, NoisyGP}, data::DataFrame, output::Symbol; - input_transform::AbstractInputTransform=ZScoreInputTransform(), - output_transform::AbstractOutputTransform=ZScoreOutputTransform(), + input_transform::AbstractDataTransform=IdentityTransform(), + output_transform::AbstractDataTransform=IdentityTransform(), optimization::AbstractHyperparameterOptimization=MaximumLikelihoodEstimation() ) input = propertynames(data[:, Not(output)]) # Is this always the case? # build in- and output transforms - dts = DataTransforms( + dts = DataStandardizer( data, input, output, - input_transform, - output_transform + InputTransform(input_transform), + OutputTransform(output_transform) ) # transform data @@ -45,8 +45,8 @@ function GaussianProcess( model::Union{UQModel, Vector{<:UQModel}}, output::Symbol, experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}; - input_transform::AbstractInputTransform=ZScoreInputTransform(), - output_transform::AbstractOutputTransform=ZScoreOutputTransform(), + input_transform::AbstractDataTransform=IdentityTransform(), + output_transform::AbstractDataTransform=IdentityTransform(), optimization::AbstractHyperparameterOptimization=MaximumLikelihoodEstimation() ) # build DataFrame @@ -54,10 +54,10 @@ function GaussianProcess( evaluate!(model, data) # build in- and output transforms - dts = DataTransforms( + dts = DataStandardizer( data, input, output, - input_transform, - output_transform + InputTransform(input_transform), + OutputTransform(output_transform) ) # transform data @@ -78,9 +78,9 @@ end # what should this calculate? Calculates only mean for now function evaluate!(gp::GaussianProcess, data::DataFrame) - x = gp.data_transforms.fᵢ(data) + x = gp.standardizer.fᵢ(data) y = mean(gp.gp(x)) - data[!, gp.output] = gp.data_transforms.fₒ⁻¹(y) # applying inverse transform to output + data[!, gp.output] = gp.standardizer.fₒ⁻¹(y) # applying inverse transform to output return nothing end \ No newline at end of file From b3feed3aab39aeb9f619b20fa94b25d978f6a8c4 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 3 Sep 2025 11:35:32 +0200 Subject: [PATCH 061/117] Preliminary test for refactored data standardization --- demo/metamodels/gaussianprocess.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/metamodels/gaussianprocess.jl b/demo/metamodels/gaussianprocess.jl index fe6509b99..b658ec5a0 100644 --- a/demo/metamodels/gaussianprocess.jl +++ b/demo/metamodels/gaussianprocess.jl @@ -22,7 +22,7 @@ optimizer = MaximumLikelihoodEstimation(Optim.Adam(alpha=0.01), Optim.Options(; # optimizer = MaximumLikelihoodEstimation(Optim.LBFGS(), Optim.Options(; iterations=10, show_trace=false)) gpr = GaussianProcess( - gp, x, himmelblau, :y, design; input_transform=UnitRangeInputTransform(), output_transform=UnitRangeOutputTransform(), optimization=optimizer + gp, x, himmelblau, :y, design; input_transform=ZScoreTransform(), output_transform=StandardNormalTransform(), optimization=optimizer ) test_data = sample(x, 1000) From 48758d0e8fbe15186f7c22cada3e60441ca6c7db Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 3 Sep 2025 12:00:12 +0200 Subject: [PATCH 062/117] Preliminary unit test for data standardization --- test/models/gp/datamanipulation.jl | 85 ------------------------------ test/models/gp/standardization.jl | 45 ++++++++++++++++ 2 files changed, 45 insertions(+), 85 deletions(-) delete mode 100644 test/models/gp/datamanipulation.jl create mode 100644 test/models/gp/standardization.jl diff --git a/test/models/gp/datamanipulation.jl b/test/models/gp/datamanipulation.jl deleted file mode 100644 index 3b18759d3..000000000 --- a/test/models/gp/datamanipulation.jl +++ /dev/null @@ -1,85 +0,0 @@ -@testset "GaussianProcessDataManipulation" begin - single_input = RandomVariable(Normal(-1, 0.5), :x1) - single_input_vector = [single_input] - multi_input = RandomVariable.([Uniform(-2, 0), Normal(-1, 0.5), Uniform(0, 1)], [:x1, :x2, :x3]) - - df_single = sample(single_input, 10) - df_single_vector = sample(single_input_vector, 10) - df_multi = sample(multi_input, 10) - - @testset "InputTransformer" begin - # Check 1D input - single_input_transformer_no = InputTransformer(df_single, names(single_input), false) - single_input_transformer_zsc = InputTransformer(df_single, names(single_input), true) - - @test all(single_input_transformer_no(df_single) .== df_single[:, 1]) - @test all( - single_input_transformer_zsc(df_single) .≈ - (df_single[:, 1] .- mean(df_single[:, 1])) / std(df_single[:, 1]) - ) - - # Check 1D input passed in a Vector - single_input_vector_transformer_no = InputTransformer( - df_single_vector, names(single_input_vector), false - ) - single_input_vector_transformer_zsc = InputTransformer( - df_single_vector, names(single_input_vector), true - ) - - @test all(single_input_vector_transformer_no(df_single_vector) .== df_single_vector[:, 1]) - @test all( - single_input_vector_transformer_zsc(df_single_vector) .≈ - (df_single_vector[:, 1] .- mean(df_single_vector[:, 1])) / std(df_single_vector[:, 1]) - ) - - # Check ND input - multi_input_transformer_no = InputTransformer(df_multi, names(multi_input), false) - multi_input_transformer_zsc = InputTransformer(df_multi, names(multi_input), true) - - df_as_matrix = Matrix(df_multi) - mean_ = mean(df_as_matrix; dims=1) - std_ = std(df_as_matrix; dims=1) - for (i, col) in enumerate(eachcol(df_as_matrix)) - df_as_matrix[:, i] .= (col .- mean_[1, i]) / std_[1, i] - end - - @test all(multi_input_transformer_no(df_multi) .== Matrix(df_multi)) - @test all(multi_input_transformer_zsc(df_multi) .≈ df_as_matrix) - end - - @testset "UQInputTransformer" begin - # Check 1D input - single_input_transformer_no = UQInputTransformer(single_input, false) - single_input_transformer_sns = UQInputTransformer(single_input, true) - - df_copy_sns = copy(df_single) - to_standard_normal_space!(single_input, df_copy_sns) - - @test all(single_input_transformer_no(df_single) .== df_single[:, 1]) - @test all(single_input_transformer_sns(df_single) .== df_copy_sns[:, 1]) - - # Check 1D input passed in a Vector - single_input_vector_transformer_no = UQInputTransformer(single_input_vector, false) - single_input_vector_transformer_sns = UQInputTransformer(single_input_vector, true) - - df_copy_sns = copy(df_single_vector) - to_standard_normal_space!(single_input_vector, df_copy_sns) - - @test all(single_input_transformer_no(df_single_vector) .== df_single_vector[:, 1]) - @test all(single_input_transformer_sns(df_single_vector) .== df_copy_sns[:, 1]) - - # Check ND input - multi_input_transformer_no = UQInputTransformer(multi_input, false) - multi_input_transformer_sns = UQInputTransformer(multi_input, true) - - df_copy_sns = copy(df_multi) - to_standard_normal_space!(multi_input, df_copy_sns) - - @test all(multi_input_transformer_no(df_multi) .== Matrix(df_multi)) - @test all(multi_input_transformer_sns(df_multi) .== Matrix(df_copy_sns)) - end - - @testset "OutputTransformer" begin - #: TODO - end -end diff --git a/test/models/gp/standardization.jl b/test/models/gp/standardization.jl new file mode 100644 index 000000000..dfeb2a6bd --- /dev/null +++ b/test/models/gp/standardization.jl @@ -0,0 +1,45 @@ +@testset "GaussianProcessDataStandardizer" begin + single_input = RandomVariable(Normal(-1, 0.5), :x1) + multi_input = RandomVariable.([Uniform(-2, 0), Normal(-1, 0.5), Uniform(0, 1)], [:x1, :x2, :x3]) + + N = 10 + output = :y + + df_single_in = sample(single_input, N) + df_single_in[!, output] = rand(N) + + df_multi_in = sample(multi_input, N) + df_multi_in[!, output] = rand(N) + + single_input_names = propertynames(df_single_in[:, Not(output)]) + multi_input_names = propertynames(df_multi_in[:, Not(output)]) + + @testset "IdentityTransform" begin + dts_single = DataStandardizer( + df_single_in, single_input_names, output, + InputTransform(IdentityTransform()), + OutputTransform(IdentityTransform()) + ) + + dts_multi = DataStandardizer( + df_multi_in, multi_input_names, output, + InputTransform(IdentityTransform()), + OutputTransform(IdentityTransform()) + ) + + single_in_transformed = dts_single.fᵢ(df_single_in) + multi_in_transformed = dts_single.fᵢ(df_single_in) + end + + @testset "ZScoreTransform" begin + + end + + @testset "UnitRangeTransform" begin + + end + + @testset "StandardNormalTransform" begin + #: TODO + end +end From 581f110a4ec57adbabb9772563969afa01270aac Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 3 Sep 2025 14:10:13 +0200 Subject: [PATCH 063/117] Add inverse output transform for gp posterior variance --- src/models/gp/standardization.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index 0dd4527e9..ac4caa650 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -30,6 +30,7 @@ struct DataStandardizer fᵢ::Function fₒ::Function fₒ⁻¹::Function + var_fₒ⁻¹::Function end # ---------------- Constructor ---------------- @@ -41,8 +42,8 @@ function DataStandardizer( output_transform::OutputTransform ) fᵢ = build_datatransform(data, input, input_transform) - fₒ, fₒ⁻¹ = build_datatransform(data, output, output_transform) - return DataStandardizer(fᵢ, fₒ, fₒ⁻¹) + fₒ, fₒ⁻¹, var_fₒ⁻¹ = build_datatransform(data, output, output_transform) + return DataStandardizer(fᵢ, fₒ, fₒ⁻¹, var_fₒ⁻¹) end # ---------------- Transform builders ---------------- @@ -148,7 +149,8 @@ function build_datatransform( f(df::DataFrame) = to_gp_format( dataframe_to_array(df, output) ) - f⁻¹(Y::AbstractArray) = Y + mean_f⁻¹(Y::AbstractArray) = Y + var_f⁻¹(Y::AbstractArray) = Y return (f, f⁻¹) end @@ -170,7 +172,8 @@ function build_datatransform( ) ) f⁻¹(Y::AbstractArray) = StatsBase.reconstruct(zscore_transform, Y) - return return (f, f⁻¹) + var_f⁻¹(Y::AbstractArray) = only(zscore_transform.scale)^2 * Y + return (f, f⁻¹, var_f⁻¹) end # UnitRange output transformation @@ -191,7 +194,8 @@ function build_datatransform( ) ) f⁻¹(Y::AbstractArray) = StatsBase.reconstruct(unitrange_transform, Y) - return return (f, f⁻¹) + var_f⁻¹(Y::AbstractArray) = only(unitrange_transform.scale)^2 * Y + return (f, f⁻¹, var_f⁻¹) end function build_datatransform( From a984293fe54587d73fe826f064047e1d45f84e1a Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 3 Sep 2025 14:11:02 +0200 Subject: [PATCH 064/117] Add var! and mean_and_var! methods for gps --- src/UncertaintyQuantification.jl | 2 ++ src/models/gp/gaussianprocess.jl | 25 +++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index cb818cf8c..ebee55410 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -163,6 +163,7 @@ export evaluate! export gradient export gradient_in_standard_normal_space export mean +export mean_and_var! export multivariate_indices export periodogram export polynomialchaos @@ -178,6 +179,7 @@ export to_copula_space export to_physical_space! export to_standard_normal_space export to_standard_normal_space! +export var! export with_gaussian_noise include("inputs/empiricaldistribution.jl") diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index cfe4ad15d..1be9b82c9 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -79,8 +79,29 @@ end # what should this calculate? Calculates only mean for now function evaluate!(gp::GaussianProcess, data::DataFrame) x = gp.standardizer.fᵢ(data) - y = mean(gp.gp(x)) + μ = mean(gp.gp(x)) - data[!, gp.output] = gp.standardizer.fₒ⁻¹(y) # applying inverse transform to output + data[!, gp.output] = gp.standardizer.fₒ⁻¹(μ) + return nothing +end + +function var!(gp::GaussianProcess, data::DataFrame) + x = gp.standardizer.fᵢ(data) + σ² = var(gp.gp(x)) + + column_name = Symbol(string(gp.output, "_", "var")) + data[!, column_name] = gp.standardizer.var_fₒ⁻¹(σ²) + return nothing +end + +function mean_and_var!(gp::GaussianProcess, data::DataFrame) + x = gp.standardizer.fᵢ(data) + μ = mean(gp.gp(x)) + σ² = var(gp.gp(x)) + + column_name_mean = Symbol(string(gp.output, "_", "mean")) + column_name_var = Symbol(string(gp.output, "_", "var")) + data[!, column_name_mean] = gp.standardizer.fₒ⁻¹(μ) + data[!, column_name_var] = gp.standardizer.var_fₒ⁻¹(σ²) return nothing end \ No newline at end of file From 0a7ec3cb27b0f7927102897e8b2223f3619412e8 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 3 Sep 2025 20:33:15 +0200 Subject: [PATCH 065/117] Fix wrongly returned output transforms --- src/models/gp/standardization.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index ac4caa650..fd74ba907 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -149,9 +149,9 @@ function build_datatransform( f(df::DataFrame) = to_gp_format( dataframe_to_array(df, output) ) - mean_f⁻¹(Y::AbstractArray) = Y + f⁻¹(Y::AbstractArray) = Y var_f⁻¹(Y::AbstractArray) = Y - return (f, f⁻¹) + return (f, f⁻¹, var_f⁻¹) end # ZScore output transformation From 8e9f339d600a34beb0bc233b49a940069eda9da2 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 3 Sep 2025 20:33:49 +0200 Subject: [PATCH 066/117] Preliminary tests --- test/models/gp/standardization.jl | 167 ++++++++++++++++++++++++++++-- test/runtests.jl | 98 +++++++++--------- 2 files changed, 208 insertions(+), 57 deletions(-) diff --git a/test/models/gp/standardization.jl b/test/models/gp/standardization.jl index dfeb2a6bd..75bb54993 100644 --- a/test/models/gp/standardization.jl +++ b/test/models/gp/standardization.jl @@ -9,34 +9,183 @@ df_single_in[!, output] = rand(N) df_multi_in = sample(multi_input, N) - df_multi_in[!, output] = rand(N) + df_multi_in[!, output] = df_single_in[!, output] single_input_names = propertynames(df_single_in[:, Not(output)]) multi_input_names = propertynames(df_multi_in[:, Not(output)]) @testset "IdentityTransform" begin - dts_single = DataStandardizer( + # 1D input + dts_single = UncertaintyQuantification.DataStandardizer( df_single_in, single_input_names, output, - InputTransform(IdentityTransform()), - OutputTransform(IdentityTransform()) + UncertaintyQuantification.InputTransform( + UncertaintyQuantification.IdentityTransform() + ), + UncertaintyQuantification.OutputTransform( + UncertaintyQuantification.IdentityTransform() + ) ) + single_in_transformed = dts_single.fᵢ(df_single_in) + + @test isa(single_in_transformed, Vector) + @test all(single_in_transformed .== df_single_in[:, only(single_input_names)]) - dts_multi = DataStandardizer( + # ND input + dts_multi = UncertaintyQuantification.DataStandardizer( df_multi_in, multi_input_names, output, - InputTransform(IdentityTransform()), - OutputTransform(IdentityTransform()) + UncertaintyQuantification.InputTransform( + UncertaintyQuantification.IdentityTransform() + ), + UncertaintyQuantification.OutputTransform( + UncertaintyQuantification.IdentityTransform() + ) ) + multi_in_transformed = dts_multi.fᵢ(df_multi_in) - single_in_transformed = dts_single.fᵢ(df_single_in) - multi_in_transformed = dts_single.fᵢ(df_single_in) + @test isa(multi_in_transformed, RowVecs) + + RowVecsMatrix = mapreduce(rv -> rv', vcat, multi_in_transformed) + @test all(RowVecsMatrix .== Matrix(df_multi_in[:, multi_input_names])) + + # Output + out_transformed = dts_single.fₒ(df_single_in) + + # single input and multi input related output transforms should do the same thing + @test all(out_transformed .== dts_multi.fₒ(df_multi_in)) + + # Mean and latent function samples just get identity transformed + @test all(df_single_in[!, output] .== dts_single.fₒ⁻¹(out_transformed)) + @test all(df_multi_in[!, output] .== dts_multi.fₒ⁻¹(out_transformed)) + + # Variance also just get identity transformed + @test all(df_single_in[!, output] .== dts_single.var_fₒ⁻¹(out_transformed)) + @test all(df_multi_in[!, output] .== dts_multi.var_fₒ⁻¹(out_transformed)) end @testset "ZScoreTransform" begin + # 1D input + dts_single = UncertaintyQuantification.DataStandardizer( + df_single_in, single_input_names, output, + UncertaintyQuantification.InputTransform( + UncertaintyQuantification.ZScoreTransform() + ), + UncertaintyQuantification.OutputTransform( + UncertaintyQuantification.ZScoreTransform() + ) + ) + single_in_transformed = dts_single.fᵢ(df_single_in) + @test isa(single_in_transformed, Vector) + + μ = mean(df_single_in[:, only(single_input_names)]) + σ = std(df_single_in[:, only(single_input_names)]) + manually_scaled = (df_single_in[:, only(single_input_names)] .- μ) ./ σ + @test all(single_in_transformed .≈ manually_scaled) + + # ND input + dts_multi = UncertaintyQuantification.DataStandardizer( + df_multi_in, multi_input_names, output, + UncertaintyQuantification.InputTransform( + UncertaintyQuantification.ZScoreTransform() + ), + UncertaintyQuantification.OutputTransform( + UncertaintyQuantification.ZScoreTransform() + ) + ) + multi_in_transformed = dts_multi.fᵢ(df_multi_in) + + @test isa(multi_in_transformed, RowVecs) + + RowVecsMatrix = mapreduce(rv -> rv', vcat, multi_in_transformed) + μ = mean(Matrix(df_multi_in[:, multi_input_names]), dims=1) + σ = std(Matrix(df_multi_in[:, multi_input_names]), dims=1) + + manually_scaled = (Matrix(df_multi_in[:, multi_input_names]) .- μ) ./ σ + @test all(RowVecsMatrix .≈ manually_scaled) + + # Output + out_transformed = dts_single.fₒ(df_single_in) + + # single input and multi input related output transforms should do the same thing + @test all(out_transformed .== dts_multi.fₒ(df_multi_in)) + + # Mean and latent function samples get rescaled and shifted + @test all(df_single_in[!, output] .≈ dts_single.fₒ⁻¹(out_transformed)) + @test all(df_multi_in[!, output] .≈ dts_multi.fₒ⁻¹(out_transformed)) + + # Variance gets multiplied by squared standard deviation used in ZScoreTransform + # Note: This usually gets applied to the GP posterior variance, here + # we just scale out_transformed back to verify it does the right thing + σ = std(df_single_in[:, output]) + + @test all(out_transformed * σ^2 .≈ dts_single.var_fₒ⁻¹(out_transformed)) + @test all(out_transformed * σ^2 .≈ dts_multi.var_fₒ⁻¹(out_transformed)) end @testset "UnitRangeTransform" begin + # 1D input + dts_single = UncertaintyQuantification.DataStandardizer( + df_single_in, single_input_names, output, + UncertaintyQuantification.InputTransform( + UncertaintyQuantification.UnitRangeTransform() + ), + UncertaintyQuantification.OutputTransform( + UncertaintyQuantification.UnitRangeTransform() + ) + ) + single_in_transformed = dts_single.fᵢ(df_single_in) + @test isa(single_in_transformed, Vector) + + tmin, tmax = extrema(df_single_in[:, only(single_input_names)]) + scale = 1 / (tmax - tmin) + manually_scaled = (df_single_in[:, only(single_input_names)] .- tmin) * scale + @test all(single_in_transformed .≈ manually_scaled) + + # ND input + dts_multi = UncertaintyQuantification.DataStandardizer( + df_multi_in, multi_input_names, output, + UncertaintyQuantification.InputTransform( + UncertaintyQuantification.UnitRangeTransform() + ), + UncertaintyQuantification.OutputTransform( + UncertaintyQuantification.UnitRangeTransform() + ) + ) + multi_in_transformed = dts_multi.fᵢ(df_multi_in) + + @test isa(multi_in_transformed, RowVecs) + + RowVecsMatrix = mapreduce(rv -> rv', vcat, multi_in_transformed) + mins_maxs = extrema(df_single_in[:, only(single_input_names)], dims=1) + tmin = map(t -> t[1], mins_maxs[1, :]) + scale = map(t -> 1 / (t[2] - t[1]), mins_maxs[1, :]) + + manually_scaled = (Matrix(df_multi_in[:, multi_input_names]) .- tmin) .* scale + print(RowVecsMatrix) + print(manually_scaled) + @test all(RowVecsMatrix .≈ manually_scaled) + + # Output + out_transformed = dts_single.fₒ(df_single_in) + + # single input and multi input related output transforms should do the same thing + @test all(out_transformed .== dts_multi.fₒ(df_multi_in)) + + # Mean and latent function samples get rescaled and shifted + @test all(df_single_in[!, output] .≈ dts_single.fₒ⁻¹(out_transformed)) + @test all(df_multi_in[!, output] .≈ dts_multi.fₒ⁻¹(out_transformed)) + + # Variance gets multiplied by squared scale used in UnitRangeTransform + # Note: This usually gets applied to the GP posterior variance, here + # we just scale out_transformed back to verify it does the right thing + tmin, tmax = extrema(df_single_in[:, output]) + scale = 1 / (tmax - tmin) + + print(out_transformed * (1/scale)^2) + print(dts_single.var_fₒ⁻¹(out_transformed)) + @test all(out_transformed * (1/scale)^2 .≈ dts_single.var_fₒ⁻¹(out_transformed)) + @test all(out_transformed * (1/scale)^2 .≈ dts_multi.var_fₒ⁻¹(out_transformed)) end @testset "StandardNormalTransform" begin diff --git a/test/runtests.jl b/test/runtests.jl index 00a02afe8..5363e7067 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,3 +1,4 @@ +using AbstractGPs using DataFrames using Distributed using HCubature @@ -9,59 +10,60 @@ using StatsBase: fit, Histogram, corkendall using Test using UncertaintyQuantification -include("inputs/empiricaldistribution.jl") -include("dynamics/psd.jl") -include("inputs/parameter.jl") -include("inputs/jointdistribution.jl") -include("inputs/imprecise/interval.jl") -include("inputs/imprecise/p-box.jl") -include("inputs/randomvariables/randomvariable.jl") -include("inputs/randomvariables/distributionparameters.jl") -include("inputs/jointdistribution.jl"); -include("inputs/inputs.jl") -include("inputs/copulas/gaussian.jl") -include("inputs/stochasticprocesses/spectralrepresentation.jl") -include("inputs/stochasticprocesses/models.jl") +# include("inputs/empiricaldistribution.jl") +# include("dynamics/psd.jl") +# include("inputs/parameter.jl") +# include("inputs/jointdistribution.jl") +# include("inputs/imprecise/interval.jl") +# include("inputs/imprecise/p-box.jl") +# include("inputs/randomvariables/randomvariable.jl") +# include("inputs/randomvariables/distributionparameters.jl") +# include("inputs/jointdistribution.jl"); +# include("inputs/inputs.jl") +# include("inputs/copulas/gaussian.jl") +# include("inputs/stochasticprocesses/spectralrepresentation.jl") +# include("inputs/stochasticprocesses/models.jl") -include("models/external/solvers.jl") -include("models/external/externalmodel.jl") -include("models/model.jl") -include("models/polyharmonicspline.jl") -include("models/pce/pcebases.jl") -include("models/pce/polynomialchaosexpansion.jl") -include("models/responsesurface.jl") -include("models/imprecise/propagation.jl") +# include("models/external/solvers.jl") +# include("models/external/externalmodel.jl") +include("models/gp/standardization.jl") +# include("models/model.jl") +# include("models/polyharmonicspline.jl") +# include("models/pce/pcebases.jl") +# include("models/pce/polynomialchaosexpansion.jl") +# include("models/responsesurface.jl") +# include("models/imprecise/propagation.jl") -include("modelupdating/bayesianupdating.jl") -include("modelupdating/bayesianMAP.jl") +# include("modelupdating/bayesianupdating.jl") +# include("modelupdating/bayesianMAP.jl") -include("reliability/form.jl") -include("reliability/probabilityoffailure.jl") -include("reliability/probabilityoffailure_imprecise.jl") +# include("reliability/form.jl") +# include("reliability/probabilityoffailure.jl") +# include("reliability/probabilityoffailure_imprecise.jl") -include("sensitivity/gradient.jl") -include("sensitivity/sobolindices.jl") +# include("sensitivity/gradient.jl") +# include("sensitivity/sobolindices.jl") -include("simulations/doe.jl") -include("simulations/montecarlo.jl") -include("simulations/subset.jl") +# include("simulations/doe.jl") +# include("simulations/montecarlo.jl") +# include("simulations/subset.jl") -include("util/fourier-transform.jl") +# include("util/fourier-transform.jl") -if Sys.islinux() - HPC = false - HPC_account = "HPC_account_1" - HPC_partition = "CPU_partition" - if "HPC" in ARGS - HPC = true - HPC_account = ARGS[2] - HPC_partition = ARGS[3] - @warn "Running a slurm test with HPC=ON, using account $HPC_account and partition $HPC_partition. Several (20) small 1-task calculations will be submitted to slurm for testing in different job array configuations." - end +# if Sys.islinux() +# HPC = false +# HPC_account = "HPC_account_1" +# HPC_partition = "CPU_partition" +# if "HPC" in ARGS +# HPC = true +# HPC_account = ARGS[2] +# HPC_partition = ARGS[3] +# @warn "Running a slurm test with HPC=ON, using account $HPC_account and partition $HPC_partition. Several (20) small 1-task calculations will be submitted to slurm for testing in different job array configuations." +# end - if HPC == false && !occursin("test/test_utilities", ENV["PATH"]) - @warn "For slurm test to pass on Linux, test_utilities/sbatch must be added to PATH" - @warn "sbatch command line tool may use the fake test_utilities/sbatch" - end - include("hpc/slurm.jl") -end +# if HPC == false && !occursin("test/test_utilities", ENV["PATH"]) +# @warn "For slurm test to pass on Linux, test_utilities/sbatch must be added to PATH" +# @warn "sbatch command line tool may use the fake test_utilities/sbatch" +# end +# include("hpc/slurm.jl") +# end From 55a0832b82109deb01eb860235800ee9d52005d6 Mon Sep 17 00:00:00 2001 From: Felix Mett <119941890+Cr0gan@users.noreply.github.com> Date: Wed, 3 Sep 2025 20:34:21 +0200 Subject: [PATCH 067/117] Add AbstractGPs to tests --- test/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Project.toml b/test/Project.toml index f36f601fe..f4c77da4c 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] +AbstractGPs = "99985d1d-32ba-4be9-9821-2ec096f28918" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" HCubature = "19dc6840-f33b-545b-b366-655c7e3ffd49" From 6196775148edaf7aa18dffce72e62a8e01e67263 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 4 Sep 2025 18:17:21 +0200 Subject: [PATCH 068/117] Add unit tests for gp data standardization --- test/models/gp/standardization.jl | 364 +++++++++++++++--------------- 1 file changed, 178 insertions(+), 186 deletions(-) diff --git a/test/models/gp/standardization.jl b/test/models/gp/standardization.jl index 75bb54993..de56bf714 100644 --- a/test/models/gp/standardization.jl +++ b/test/models/gp/standardization.jl @@ -1,194 +1,186 @@ +function make_standardizer( + dataframe, + input_names, + output, + it, + ot +) + UncertaintyQuantification.DataStandardizer( + dataframe, input_names, output, + UncertaintyQuantification.InputTransform(it), + UncertaintyQuantification.OutputTransform(ot), + ) +end + @testset "GaussianProcessDataStandardizer" begin - single_input = RandomVariable(Normal(-1, 0.5), :x1) - multi_input = RandomVariable.([Uniform(-2, 0), Normal(-1, 0.5), Uniform(0, 1)], [:x1, :x2, :x3]) + transforms = [ + UncertaintyQuantification.IdentityTransform(), + UncertaintyQuantification.ZScoreTransform(), + UncertaintyQuantification.UnitRangeTransform(), + UncertaintyQuantification.StandardNormalTransform() + ] N = 10 output = :y - df_single_in = sample(single_input, N) - df_single_in[!, output] = rand(N) - - df_multi_in = sample(multi_input, N) - df_multi_in[!, output] = df_single_in[!, output] - - single_input_names = propertynames(df_single_in[:, Not(output)]) - multi_input_names = propertynames(df_multi_in[:, Not(output)]) - - @testset "IdentityTransform" begin - # 1D input - dts_single = UncertaintyQuantification.DataStandardizer( - df_single_in, single_input_names, output, - UncertaintyQuantification.InputTransform( - UncertaintyQuantification.IdentityTransform() - ), - UncertaintyQuantification.OutputTransform( - UncertaintyQuantification.IdentityTransform() - ) - ) - single_in_transformed = dts_single.fᵢ(df_single_in) - - @test isa(single_in_transformed, Vector) - @test all(single_in_transformed .== df_single_in[:, only(single_input_names)]) - - # ND input - dts_multi = UncertaintyQuantification.DataStandardizer( - df_multi_in, multi_input_names, output, - UncertaintyQuantification.InputTransform( - UncertaintyQuantification.IdentityTransform() - ), - UncertaintyQuantification.OutputTransform( - UncertaintyQuantification.IdentityTransform() - ) - ) - multi_in_transformed = dts_multi.fᵢ(df_multi_in) - - @test isa(multi_in_transformed, RowVecs) - - RowVecsMatrix = mapreduce(rv -> rv', vcat, multi_in_transformed) - @test all(RowVecsMatrix .== Matrix(df_multi_in[:, multi_input_names])) - - # Output - out_transformed = dts_single.fₒ(df_single_in) - - # single input and multi input related output transforms should do the same thing - @test all(out_transformed .== dts_multi.fₒ(df_multi_in)) - - # Mean and latent function samples just get identity transformed - @test all(df_single_in[!, output] .== dts_single.fₒ⁻¹(out_transformed)) - @test all(df_multi_in[!, output] .== dts_multi.fₒ⁻¹(out_transformed)) - - # Variance also just get identity transformed - @test all(df_single_in[!, output] .== dts_single.var_fₒ⁻¹(out_transformed)) - @test all(df_multi_in[!, output] .== dts_multi.var_fₒ⁻¹(out_transformed)) - end - - @testset "ZScoreTransform" begin - # 1D input - dts_single = UncertaintyQuantification.DataStandardizer( - df_single_in, single_input_names, output, - UncertaintyQuantification.InputTransform( - UncertaintyQuantification.ZScoreTransform() - ), - UncertaintyQuantification.OutputTransform( - UncertaintyQuantification.ZScoreTransform() - ) - ) - single_in_transformed = dts_single.fᵢ(df_single_in) - - @test isa(single_in_transformed, Vector) - - μ = mean(df_single_in[:, only(single_input_names)]) - σ = std(df_single_in[:, only(single_input_names)]) - manually_scaled = (df_single_in[:, only(single_input_names)] .- μ) ./ σ - @test all(single_in_transformed .≈ manually_scaled) - - # ND input - dts_multi = UncertaintyQuantification.DataStandardizer( - df_multi_in, multi_input_names, output, - UncertaintyQuantification.InputTransform( - UncertaintyQuantification.ZScoreTransform() - ), - UncertaintyQuantification.OutputTransform( - UncertaintyQuantification.ZScoreTransform() - ) - ) - multi_in_transformed = dts_multi.fᵢ(df_multi_in) - - @test isa(multi_in_transformed, RowVecs) - - RowVecsMatrix = mapreduce(rv -> rv', vcat, multi_in_transformed) - μ = mean(Matrix(df_multi_in[:, multi_input_names]), dims=1) - σ = std(Matrix(df_multi_in[:, multi_input_names]), dims=1) - - manually_scaled = (Matrix(df_multi_in[:, multi_input_names]) .- μ) ./ σ - @test all(RowVecsMatrix .≈ manually_scaled) - - # Output - out_transformed = dts_single.fₒ(df_single_in) - - # single input and multi input related output transforms should do the same thing - @test all(out_transformed .== dts_multi.fₒ(df_multi_in)) - - # Mean and latent function samples get rescaled and shifted - @test all(df_single_in[!, output] .≈ dts_single.fₒ⁻¹(out_transformed)) - @test all(df_multi_in[!, output] .≈ dts_multi.fₒ⁻¹(out_transformed)) - - # Variance gets multiplied by squared standard deviation used in ZScoreTransform - # Note: This usually gets applied to the GP posterior variance, here - # we just scale out_transformed back to verify it does the right thing - σ = std(df_single_in[:, output]) - - @test all(out_transformed * σ^2 .≈ dts_single.var_fₒ⁻¹(out_transformed)) - @test all(out_transformed * σ^2 .≈ dts_multi.var_fₒ⁻¹(out_transformed)) - end - - @testset "UnitRangeTransform" begin - # 1D input - dts_single = UncertaintyQuantification.DataStandardizer( - df_single_in, single_input_names, output, - UncertaintyQuantification.InputTransform( - UncertaintyQuantification.UnitRangeTransform() - ), - UncertaintyQuantification.OutputTransform( - UncertaintyQuantification.UnitRangeTransform() - ) - ) - single_in_transformed = dts_single.fᵢ(df_single_in) - - @test isa(single_in_transformed, Vector) - - tmin, tmax = extrema(df_single_in[:, only(single_input_names)]) - scale = 1 / (tmax - tmin) - manually_scaled = (df_single_in[:, only(single_input_names)] .- tmin) * scale - @test all(single_in_transformed .≈ manually_scaled) - - # ND input - dts_multi = UncertaintyQuantification.DataStandardizer( - df_multi_in, multi_input_names, output, - UncertaintyQuantification.InputTransform( - UncertaintyQuantification.UnitRangeTransform() - ), - UncertaintyQuantification.OutputTransform( - UncertaintyQuantification.UnitRangeTransform() - ) - ) - multi_in_transformed = dts_multi.fᵢ(df_multi_in) - - @test isa(multi_in_transformed, RowVecs) - - RowVecsMatrix = mapreduce(rv -> rv', vcat, multi_in_transformed) - mins_maxs = extrema(df_single_in[:, only(single_input_names)], dims=1) - tmin = map(t -> t[1], mins_maxs[1, :]) - scale = map(t -> 1 / (t[2] - t[1]), mins_maxs[1, :]) - - manually_scaled = (Matrix(df_multi_in[:, multi_input_names]) .- tmin) .* scale - print(RowVecsMatrix) - print(manually_scaled) - @test all(RowVecsMatrix .≈ manually_scaled) - - # Output - out_transformed = dts_single.fₒ(df_single_in) - - # single input and multi input related output transforms should do the same thing - @test all(out_transformed .== dts_multi.fₒ(df_multi_in)) - - # Mean and latent function samples get rescaled and shifted - @test all(df_single_in[!, output] .≈ dts_single.fₒ⁻¹(out_transformed)) - @test all(df_multi_in[!, output] .≈ dts_multi.fₒ⁻¹(out_transformed)) - - # Variance gets multiplied by squared scale used in UnitRangeTransform - # Note: This usually gets applied to the GP posterior variance, here - # we just scale out_transformed back to verify it does the right thing - tmin, tmax = extrema(df_single_in[:, output]) - scale = 1 / (tmax - tmin) - - print(out_transformed * (1/scale)^2) - print(dts_single.var_fₒ⁻¹(out_transformed)) - @test all(out_transformed * (1/scale)^2 .≈ dts_single.var_fₒ⁻¹(out_transformed)) - @test all(out_transformed * (1/scale)^2 .≈ dts_multi.var_fₒ⁻¹(out_transformed)) - end + single_input = RandomVariable(Normal(-1, 0.5), :x1) + df_single = sample(single_input, N) + df_single[!, output] = rand(N) - @testset "StandardNormalTransform" begin - #: TODO + multi_input = RandomVariable.([Uniform(-2, 0), Normal(-1, 0.5), Uniform(0, 1)], [:x1, :x2, :x3]) + df_multi = sample(multi_input, N) + df_multi[!, output] = df_single[!, output] + + names_single = propertynames(df_single[:, Not(output)]) + names_multi = propertynames(df_multi[:, Not(output)]) + + for transform in transforms + @testset "$(nameof(typeof(transform)))" begin + for (testname, df, inputs, names) in [ + ("single input", df_single, single_input, names_single), + ("multi input", df_multi, multi_input, names_multi) + ] + @testset "$testname" begin + if isa(transform, UncertaintyQuantification.StandardNormalTransform) + @test_throws ArgumentError UncertaintyQuantification.DataStandardizer( + df, inputs, output, + UncertaintyQuantification.InputTransform(transform), + UncertaintyQuantification.OutputTransform(transform) + ) + # Test output shapes for identity output transform to check StandardNormalTransform for inputs + dts = UncertaintyQuantification.DataStandardizer( + df, inputs, output, + UncertaintyQuantification.InputTransform(transform), + UncertaintyQuantification.OutputTransform( + UncertaintyQuantification.IdentityTransform() + ) + ) + Xin = dts.fᵢ(df) + if testname == "single input" + # input gets transformed to a Vector + @test isa(Xin, Vector) + else + # input gets transformed to RowVecs + @test isa(Xin, RowVecs) + end + else + dts = UncertaintyQuantification.DataStandardizer( + df, inputs, output, + UncertaintyQuantification.InputTransform(transform), + UncertaintyQuantification.OutputTransform(transform), + ) + + Xin = dts.fᵢ(df) + Xout = dts.fₒ(df) + Yout = dts.fₒ⁻¹(Xout) + var_Yout = dts.var_fₒ⁻¹(Xout) + if testname == "single input" + # input gets transformed to a Vector + @test isa(Xin, Vector) + if isa(transform, UncertaintyQuantification.IdentityTransform) + # Test input scaling + @test all(Xin .== df[!, only(names)]) + # Test output scaling + @test all(Yout .== Xout) + # Test output inverse transform + @test all(df[!, output] .== Yout) + # Test output inverse transform for variance + @test all(df[!, output] .== var_Yout) + + elseif isa(transform, UncertaintyQuantification.ZScoreTransform) + # Test input scaling + μ = mean(df[!, only(names)]) + σ = std(df[!, only(names)]) + Min = (df[!, only(names)] .- μ) ./ σ + @test all(Xin .≈ Min) + + # Test output scaling + μ = mean(df[!, output]) + σ = std(df[!, output]) + Mout = (df[!, output] .- μ) ./ σ + @test all(Mout .≈ Xout) + # Test output inverse transform + @test all(df[!, output] .≈ Yout) + # Test output inverse transform for variance + @test all(σ^2 * Xout .≈ var_Yout) + + elseif isa(transform, UncertaintyQuantification.UnitRangeTransform) + # Test input scaling + tmin, tmax = extrema(df[!, only(names)]) + shift = tmin + scale = 1 / (tmax - tmin) + Min = (df[!, only(names)] .- shift) * scale + @test all(Xin .≈ Min) + + # Test output scaling + tmin, tmax = extrema(df[!, output]) + shift = tmin + scale = 1 / (tmax - tmin) + Mout = (df[!, output] .- shift) * scale + @test all(Mout .≈ Xout) + # Test output inverse transform + @test all(df[!, output] .≈ Yout) + # Test output inverse transform for variance + @test all(scale^2 * Xout .≈ var_Yout) + + end + else + # input gets transformed to RowVecs + @test isa(Xin, RowVecs) + if isa(transform, UncertaintyQuantification.IdentityTransform) + # Test input scaling + Min = mapreduce(rv -> rv', vcat, Xin) + @test all(Min .== Matrix(df[!, names])) + # Test output scaling + @test all(Yout .== Xout) + # Test output inverse transform + @test all(df[!, output] .== Yout) + # Test output inverse transform for variance + @test all(df[!, output] .== var_Yout) + + elseif isa(transform, UncertaintyQuantification.ZScoreTransform) + # Test input scaling + Xin = mapreduce(rv -> rv', vcat, Xin) + μ = mean(Matrix(df[!, names]), dims=1) + σ = std(Matrix(df[!, names]), dims=1) + Min = (Matrix(df[!, names]) .- μ) ./ σ + @test all(Xin .≈ Min) + + # Test output scaling + μ = mean(df[!, output]) + σ = std(df[!, output]) + Mout = (df[!, output] .- μ) ./ σ + @test all(Mout .≈ Xout) + # Test output inverse transform + @test all(df[!, output] .≈ Yout) + # Test output inverse transform for variance + @test all(σ^2 * Xout .≈ var_Yout) + + elseif isa(transform, UncertaintyQuantification.UnitRangeTransform) + # Test input scaling + Xin = mapreduce(rv -> rv', vcat, Xin) + extrs = extrema(Matrix(df[!, names]), dims=1) + shift = map(t -> t[1], extrs[1, :]) + scale = map(t -> 1 / (t[2] - t[1]), extrs[1, :]) + Min = (Matrix(df[!, names]) .- shift') .* scale' + @test all(Xin .≈ Min) + + # Test output scaling + tmin, tmax = extrema(df[!, output]) + shift = tmin + scale = 1 / (tmax - tmin) + Mout = (df[!, output] .- shift) * scale + @test all(Mout .≈ Xout) + # Test output inverse transform + @test all(df[!, output] .≈ Yout) + # Test output inverse transform for variance + @test all(scale^2 * Xout .≈ var_Yout) + + end + end + end + end + end + end end end From f77e7b1080cdbf1b50ff442ca8e7f7fde90a68e8 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 4 Sep 2025 18:17:44 +0200 Subject: [PATCH 069/117] Add gaussian process regression reference --- docs/references.bib | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/references.bib b/docs/references.bib index 437c5433a..7378280cf 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -360,6 +360,19 @@ @book{raiffaAppliedStatisticalDecision1961 pagetotal = {356} } +@book{rasmussen2005gaussian, + title = {Gaussian {Processes} for {Machine} {Learning}}, + copyright = {http://creativecommons.org/licenses/by-nc-nd/4.0/}, + isbn = {978-0-262-25683-4}, + url = {https://direct.mit.edu/books/book/2320/Gaussian-Processes-for-Machine-Learning}, + language = {en}, + urldate = {2025-09-04}, + publisher = {The MIT Press}, + author = {Rasmussen, Carl Edward and Williams, Christopher K. I.}, + month = nov, + year = {2005}, + doi = {10.7551/mitpress/3206.001.0001}, +} @article{schmelzer2023random, title = {Random sets, copulas and related sets of probability measures}, From 8137f0bb4336bbb5002fe5464f0bffe52b58cd93 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 4 Sep 2025 18:18:14 +0200 Subject: [PATCH 070/117] Add theoretical background documentation for gaussian process regression --- docs/src/manual/metamodels.md | 63 ++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/docs/src/manual/metamodels.md b/docs/src/manual/metamodels.md index 96ec27631..b69b8db80 100644 --- a/docs/src/manual/metamodels.md +++ b/docs/src/manual/metamodels.md @@ -8,4 +8,65 @@ The ones implemented here are `TwoLevelFactorial`, `FullFactorial`, `FractionalF ## Response Surface -A Response Surface is a simple polynomial surrogate model. It can be trained by providing it with evaluated points of a function or any of the aforementioned experimental designs. \ No newline at end of file +A Response Surface is a simple polynomial surrogate model. It can be trained by providing it with evaluated points of a function or any of the aforementioned experimental designs. + +## Gaussian Process Regression + +### Theoretical Background +A Gaussian Process (GP) is a collection of random variables, any finite subset of which has a joint Gaussian distribution. It is fully specified by a mean function ``m(x)`` and a covariance (kernel) function ``k(x, x')``. In GP regression, we aim to model an unknown function ``f(x)``. Before observing any data, we assume that the function ``f(x)`` is distributed according to a GP: + +```math +f(x) \sim \mathcal{G}\mathcal{P}\left( m(x), k(x, x') \right). +``` + +This prior GP specifies that any finite collection of function values follows a multivariate normal distribution. + +#### Posterior Gaussian Process +The posterior Gaussian Process represents the distribution of functions after incorporating observed data. We denote the observation data as: + +```math +\mathcal{D} = \lbrace (\hat{x}_i, \hat{f}_i) \mid i=1, \dots, N \rbrace, +``` + +where ``\hat{f}_i = f(\hat{x}_i)`` in the noise-free observation case, and ``\hat{f}_i = f(\hat{x}_i) + \varepsilon_i`` in the noisy case, with independent noise terms ``\varepsilon_i \sim \mathcal{N}(0, \sigma_\varepsilon^2)``. Let ``\hat{X} = [\hat{x}_1, \dots, \hat{x}_N]`` denote the collection of observation data locations. The corresponding mean vector and covariance matrix are: + +```math +\mu(\hat{X}) = [m(\hat{x}_1), \dots, m(\hat{x}_N)], \quad K(\hat{X}, \hat{X}) \text{ with entries } K_{ij} = k(\hat{x}_i, \hat{x}_j). + ``` + +For a new input location ``x^*`` we are interested at the unknown function value ``f^* = f(x^*)``. By the definition of a GP, the joint distribution of observed outputs ``\hat{f}_i`` and the unknown ``f^*`` is multivariate Gaussian: + +```math +\begin{bmatrix} \hat{f}\\ f^* \end{bmatrix} = \mathcal{N}\left( \begin{bmatrix} \mu(\hat{X}) \\ m(x^*) \end{bmatrix}, \begin{bmatrix} K(\hat{X}, \hat{X}) & K(\hat{X}, x^*)\\ K(x^*, \hat{X}) & K(x^*, x^*) \end{bmatrix} \right), +``` + +where: +- ``K(\hat{X}, \hat{X})`` is the covariance matrix with entries ``K_{ij} = k(\hat{x}_i, \hat{x}_j)``, +- ``K(\hat{X}, x^*)`` is the covariance matrix with entries ``K_{i1} = k(\hat{x}_i, x^*)``, +- and ``K(x^*, x^*)`` is the variance at the unknown input location. + +We can then obtain the posterior distribution of ``f^*`` from the properties of multivariate Gaussian distributions (see, e.g. Appendix A.2 in [rasmussen2005gaussian](@cite)), by conditioning the joint Gaussian on the observed outputs ``\hat{f}_i``: + +```math +f^* \mid \hat{X}, \hat{f}, x^* \sim \mathcal{N}(\mu^*(x^*), \Sigma^*(x^*)), +``` + +with + +```math +\mu^*(x^*) = m(x^*) + K(x^*, \hat{X})K(\hat{X}, \hat{X})^{-1}(\hat{f} - \mu(\hat{X})), \\ +\Sigma^*(x^*) = K(x^*, x^*) - K(x^*, \hat{X})K(\hat{X}, \hat{X})^{-1}K(\hat{X}, x^*). +``` + +In the noisy observation case, the covariance between training points is adjusted by adding the noise variance:: + +```math +K(\hat{X}, \hat{X}) \rightarrow K(\hat{X}, \hat{X}) + \sigma^2_{\varepsilon}I. +``` + +The computation of the posterior predictive distribution generalizes straightforwardly to multiple input locations, providing both the posterior mean, which can serve as a regression estimate of the unknown function, and the posterior variances, which quantify the uncertainty at each point. Because the posterior is multivariate Gaussian, one can also sample function realizations at specified locations to visualize possible functions consistent with the observed data. + +#### Hyperparameter optimization +The GP prior, together with the observed data, defines a posterior distribution over functions that captures predictions at new inputs, including uncertainty. The noise-free and noisy cases differ only in the posterior covariance, which incorporates the observation noise when present. + +### Constructing A Gaussian Process Regression Model \ No newline at end of file From 109dc59bc33f678aeedb68f8aa78f825874b3349 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 15 Sep 2025 16:58:15 +0200 Subject: [PATCH 071/117] Add export NoHyperparameterOptimization --- src/UncertaintyQuantification.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index ebee55410..7097ad6a5 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -129,6 +129,7 @@ export MaximumLikelihoodBayesian export MaximumLikelihoodEstimation export Model export MonteCarlo +export NoHyperparameterOptimization export ParallelModel export Parameter export PlackettBurman From 6fc01d1a2b6797fc2e4e5934b28be324aa013e63 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 15 Sep 2025 16:58:40 +0200 Subject: [PATCH 072/117] Refactor NoHyperparameterOptimization --- src/models/gp/hyperparametertuning.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/models/gp/hyperparametertuning.jl b/src/models/gp/hyperparametertuning.jl index 4b42f55e0..6b685b5e0 100644 --- a/src/models/gp/hyperparametertuning.jl +++ b/src/models/gp/hyperparametertuning.jl @@ -2,7 +2,7 @@ using DifferentiationInterface abstract type AbstractHyperparameterOptimization end -struct NoOptimization <: AbstractHyperparameterOptimization end +struct NoHyperparameterOptimization <: AbstractHyperparameterOptimization end struct MaximumLikelihoodEstimation <: AbstractHyperparameterOptimization optimizer::Optim.FirstOrderOptimizer @@ -18,7 +18,7 @@ function optimize_hyperparameters( gp::Union{AbstractGPs.GP, NoisyGP}, ::Union{RowVecs{<:Real}, Vector{<:Real}}, ::Vector{<:Real}, - ::NoOptimization + ::NoHyperparameterOptimization ) return gp end From 1c4bfb64ee701ee2dbde17f833088a5da0bdf242 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 15 Sep 2025 16:59:32 +0200 Subject: [PATCH 073/117] Add tests for GaussianProcess construction --- test/models/gp/gaussianprocess.jl | 185 ++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) diff --git a/test/models/gp/gaussianprocess.jl b/test/models/gp/gaussianprocess.jl index e69de29bb..8520ea3ad 100644 --- a/test/models/gp/gaussianprocess.jl +++ b/test/models/gp/gaussianprocess.jl @@ -0,0 +1,185 @@ +@testset "GaussianProcessRegression" begin + + @testset "OneDimensionalInput" begin + # --------------------------------------------------- + # Test construction from DataFrame + x = collect(range(0, stop=5, length=10)) + y = sin.(x) + data = DataFrame(:x => x, :y => y) + + σ² = 1e-5 + kernel = SqExponentialKernel() + + gp = GP(0.0, kernel) + gpr = GaussianProcess( + gp, + data, + :y; + optimization=NoHyperparameterOptimization() + ) + + test_data = DataFrame(:x => x) + evaluate!(gpr, test_data) + mean_and_var!(gpr, test_data) + + # evaluate! returns mean as standard + @test all(test_data[!, :y] .== test_data[!, :y_mean]) + # outputs at trainingset should be very close + @test all(isapprox.(test_data[!, :y], y; atol=1e-14)) + # variance should be very close to zero as we did not use observation noise + @test all(isapprox.(test_data[!, :y_var], 0.0; atol=1e-14)) + + noisy_gp = with_gaussian_noise(GP(0.0, kernel), σ²) + noisy_gpr = GaussianProcess( + noisy_gp, + data, + :y; + optimization=NoHyperparameterOptimization() + ) + + test_data = DataFrame(:x => x) + evaluate!(noisy_gpr, test_data) + mean_and_var!(noisy_gpr, test_data) + + # evaluate! returns mean as standard + @test all(test_data[!, :y] .== test_data[!, :y_mean]) + # check if prediction variance is within 5% deviation from prescribed noise + @test all(abs.(test_data[!, :y_var] .- σ²) .< 0.05σ²) + + # --------------------------------------------------- + # Test construction from UQInput + UQModel + x = RandomVariable(Uniform(0, 5), :x) + model = Model( + df -> sin.(df.x), :y + ) + design = LatinHypercubeSampling(10) + + σ² = 1e-5 + kernel = SqExponentialKernel() + + gp = GP(0.0, kernel) + gpr = GaussianProcess( + gp, + x, + model, + :y, + design; + optimization=NoHyperparameterOptimization() + ) + + test_data = sample(x, design) + evaluate!(gpr, test_data) + mean_and_var!(gpr, test_data) + + # evaluate! returns mean as standard + @test all(test_data[!, :y] .== test_data[!, :y_mean]) + + noisy_gp = with_gaussian_noise(GP(0.0, kernel), σ²) + noisy_gpr = GaussianProcess( + noisy_gp, + x, + model, + :y, + design; + optimization=NoHyperparameterOptimization() + ) + + test_data = sample(x, design) + evaluate!(gpr, test_data) + mean_and_var!(gpr, test_data) + + # evaluate! returns mean as standard + @test all(test_data[!, :y] .== test_data[!, :y_mean]) + end + @testset "TwoDimensionalInput" begin + # --------------------------------------------------- + # Test construction from DataFrame + x = [collect(range(0, stop=5, length=10)), collect(range(0, stop=5, length=10))] + y = sin.(x[1]) + cos.(x[2]) + data = DataFrame(:x1 => x[1], :x2 => x[2], :y => y) + + σ² = 1e-5 + kernel = SqExponentialKernel() + + gp = GP(0.0, kernel) + gpr = GaussianProcess( + gp, + data, + :y; + optimization=NoHyperparameterOptimization() + ) + + test_data = DataFrame(:x1 => x[1], :x2 => x[2]) + evaluate!(gpr, test_data) + mean_and_var!(gpr, test_data) + + # evaluate! returns mean as standard + @test all(test_data[!, :y] .== test_data[!, :y_mean]) + # outputs at trainingset should be very close + @test all(isapprox.(test_data[!, :y], y; atol=1e-14)) + # variance should be very close to zero as we did not use observation noise + @test all(isapprox.(test_data[!, :y_var], 0.0; atol=1e-14)) + + noisy_gp = with_gaussian_noise(GP(0.0, kernel), σ²) + noisy_gpr = GaussianProcess( + noisy_gp, + data, + :y; + optimization=NoHyperparameterOptimization() + ) + + test_data = DataFrame(:x1 => x[1], :x2 => x[2]) + evaluate!(noisy_gpr, test_data) + mean_and_var!(noisy_gpr, test_data) + + # evaluate! returns mean as standard + @test all(test_data[!, :y] .== test_data[!, :y_mean]) + # check if prediction variance is within 5% deviation from prescribed noise + @test all(abs.(test_data[!, :y_var] .- σ²) .< 0.05σ²) + + # --------------------------------------------------- + # Test construction from UQInput + UQModel + x = RandomVariable.([Uniform(0, 5), Uniform(0, 5)], [:x1, :x2]) + model = Model( + df -> sin.(df.x1) + cos.(df.x2), :y + ) + design = LatinHypercubeSampling(10) + + σ² = 1e-5 + kernel = SqExponentialKernel() + + gp = GP(0.0, kernel) + gpr = GaussianProcess( + gp, + x, + model, + :y, + design; + optimization=NoHyperparameterOptimization() + ) + + test_data = sample(x, design) + evaluate!(gpr, test_data) + mean_and_var!(gpr, test_data) + + # evaluate! returns mean as standard + @test all(test_data[!, :y] .== test_data[!, :y_mean]) + + noisy_gp = with_gaussian_noise(GP(0.0, kernel), σ²) + noisy_gpr = GaussianProcess( + noisy_gp, + x, + model, + :y, + design; + optimization=NoHyperparameterOptimization() + ) + + test_data = sample(x, design) + evaluate!(gpr, test_data) + mean_and_var!(gpr, test_data) + + # evaluate! returns mean as standard + @test all(test_data[!, :y] .== test_data[!, :y_mean]) + end +end \ No newline at end of file From 531b25a437f369d6e9922a9449f5c5a063e70f84 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Mon, 15 Sep 2025 16:59:53 +0200 Subject: [PATCH 074/117] Preliminary test files --- test/models/gp/hyperparametertuning.jl | 0 test/runtests.jl | 1 + 2 files changed, 1 insertion(+) create mode 100644 test/models/gp/hyperparametertuning.jl diff --git a/test/models/gp/hyperparametertuning.jl b/test/models/gp/hyperparametertuning.jl new file mode 100644 index 000000000..e69de29bb diff --git a/test/runtests.jl b/test/runtests.jl index 5363e7067..0d80e6519 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,6 +26,7 @@ using UncertaintyQuantification # include("models/external/solvers.jl") # include("models/external/externalmodel.jl") +include("models/gp/gaussianprocess.jl") include("models/gp/standardization.jl") # include("models/model.jl") # include("models/polyharmonicspline.jl") From 6bc6ead253a88b92b4444980ae5dfc21f9fed2c7 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 18 Sep 2025 20:25:20 +0200 Subject: [PATCH 075/117] Preliminary idea to test parameter extraction implementation --- test/models/gp/parameterization.jl | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 test/models/gp/parameterization.jl diff --git a/test/models/gp/parameterization.jl b/test/models/gp/parameterization.jl new file mode 100644 index 000000000..16420837f --- /dev/null +++ b/test/models/gp/parameterization.jl @@ -0,0 +1,32 @@ +exported_names = names(KernelFunctions; all=false) + +function is_kernel_type(sym) + obj = getfield(KernelFunctions, sym) # get type or value + if obj isa DataType + return obj <: Kernel + elseif obj isa UnionAll + return obj.body <: Kernel + else + return false + end +end + +# Filter the symbols +kernel_symbols = filter(is_kernel_type, exported_names) + +# Map to actual type objects +kernel_types = [getfield(KernelFunctions, sym) for sym in kernel_symbols] + +function is_transform_type(sym) + obj = getfield(KernelFunctions, sym) + if obj isa DataType + return obj <: Transform + elseif obj isa UnionAll + return obj.body <: Transform + else + return false + end +end + +transform_symbols = filter(is_transform_type, exported_names) +transform_types = getfield.(Ref(KernelFunctions), transform_symbols) \ No newline at end of file From 9f264ae7700703227ca5a35adf676a4de982a080 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 13:53:48 +0200 Subject: [PATCH 076/117] Preliminary commit --- test/models/gp/parameterization.jl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/models/gp/parameterization.jl b/test/models/gp/parameterization.jl index 16420837f..f7505f3a1 100644 --- a/test/models/gp/parameterization.jl +++ b/test/models/gp/parameterization.jl @@ -29,4 +29,13 @@ function is_transform_type(sym) end transform_symbols = filter(is_transform_type, exported_names) -transform_types = getfield.(Ref(KernelFunctions), transform_symbols) \ No newline at end of file +transform_types = getfield.(Ref(KernelFunctions), transform_symbols) + +function check_implementation(t::Vector{Type}) + extract = map(tᵢ -> hasmethod(UncertaintyQuantification.extract_parameters, Tuple{tᵢ}), t) + return extract +end + +transform_types[.!check_implementation(transform_types)] + +kernel_types[.!check_implementation(kernel_types)] \ No newline at end of file From d0880a9dada8b6731e066eb4b938aec8ba60d183 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 16:32:22 +0200 Subject: [PATCH 077/117] Add extract and apply method for unsupported kernels --- src/models/gp/parameterization.jl | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl index d5162d8c7..5ea1c75ca 100644 --- a/src/models/gp/parameterization.jl +++ b/src/models/gp/parameterization.jl @@ -42,6 +42,11 @@ apply_parameters(m::ZeroMean, θ) = m extract_parameters(m::ConstMean) = m.c apply_parameters(::ConstMean, θ) = ConstMean(θ) +# TODO: CustomMean uses a user defined function that could depend on parameters +# We should support custom definitions for applying and extracting parameters +extract_parameters(::CustomMean) = nothing +apply_parameters(m::CustomMean, θ) = m + # ---------------- Kernel functions ---------------- # Kernels and transforms without parameters @@ -54,10 +59,10 @@ BaseKernelsWithoutParameters = Union{ } # TODO: GibbsKernel has a lengthscale function which could depend on trainable parameters -KernelsWithoutParameters = Union{GibbsKernel} +KernelsWithoutParameters = GibbsKernel # TODO: FunctionTransform has a transformation function which could depend on trainable parameters -TransformsWithoutParameters = Union{FunctionTransform, SelectTransform, IdentityTransform} +TransformsWithoutParameters = Union{FunctionTransform, SelectTransform, KernelFunctions.IdentityTransform} AllWithoutParameters = Union{ BaseKernelsWithoutParameters, @@ -65,10 +70,21 @@ AllWithoutParameters = Union{ TransformsWithoutParameters } +# TODO: Add support for multi-output models (MOKernel) and general neural networks as kernels (NeuralKernelNetwork) +UnsupportedKernels = Union{ + IndependentMOKernel, IntrinsicCoregionMOKernel, + LatentFactorMOKernel, LinearMixingModelKernel, + KernelFunctions.NeuralKernelNetwork +} + # no parameters extract_parameters(::T) where {T<:AllWithoutParameters} = nothing apply_parameters(k::T, θ) where {T<:AllWithoutParameters} = k +# currently unsupported +extract_parameters(::T) where {T<:UnsupportedKernels} = throw(ArgumentError("`extract_parameters` is not supported for kernel type $(T).")) +apply_parameters(k::T, θ) where {T<:UnsupportedKernels} = throw(ArgumentError("`apply_parameters` is not supported for kernel type $(T).")) + # basekernels (see KernelFunctions.jl src/basekernels) extract_parameters(k::ConstantKernel) = ParameterHandling.positive(k.c) apply_parameters(::ConstantKernel, θ) = ConstantKernel(; c=only(θ)) @@ -104,7 +120,6 @@ extract_parameters(k::GammaRationalKernel) = ( apply_parameters(::GammaRationalKernel, θ) = GammaRationalKernel(; α=only(θ[1]), γ=only(θ[2])) # kernels (see KernelFunctions.jl src/kernels) -# TODO: NeuralKernelNetwork not implemented extract_parameters(k::KernelProduct) = map(extract_parameters, k.kernels) apply_parameters(k::KernelProduct, θ) = KernelProduct(map(apply_parameters, k.kernels, θ)) From 0871a3cdb3caf848c4461c11cb823703c073bb5d Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 16:32:51 +0200 Subject: [PATCH 078/117] Add test to check improvement after hyperparameter optimization --- test/models/gp/hyperparametertuning.jl | 59 ++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/models/gp/hyperparametertuning.jl b/test/models/gp/hyperparametertuning.jl index e69de29bb..d49aa88e1 100644 --- a/test/models/gp/hyperparametertuning.jl +++ b/test/models/gp/hyperparametertuning.jl @@ -0,0 +1,59 @@ +@testset "GaussianProcessRegressionHyperparameterTuning" begin + + @testset "OneDimensionalInput" begin + x = collect(range(0, stop=5, length=10)) + y = sin.(x) + data = DataFrame(:x => x, :y => y) + + σ² = 1e-5 + kernel = SqExponentialKernel() ∘ ScaleTransform(10.0) + + gp = with_gaussian_noise(GP(0.0, kernel), σ²) + gpr_no_opt = GaussianProcess( + gp, + data, + :y; + optimization=NoHyperparameterOptimization() + ) + + gpr_opt = GaussianProcess( + gp, + data, + :y; + optimization=MaximumLikelihoodEstimation() + ) + + likelihood_no_opt = logpdf(gpr_no_opt.gp(x), y) + likelihood_opt = logpdf(gpr_opt.gp(x), y) + + @test likelihood_opt > likelihood_no_opt + end + @testset "TwoDimensionalInput" begin + x = [collect(range(0, stop=5, length=10)) collect(range(0, stop=5, length=10))] + y = sin.(x[:, 1]) + cos.(x[:, 2]) + data = DataFrame(:x1 => x[:, 1], :x2 => x[:, 2], :y => y) + + σ² = 1e-5 + kernel = SqExponentialKernel() ∘ ARDTransform([5.0, 5.0]) + + gp = with_gaussian_noise(GP(0.0, kernel), σ²) + gpr_no_opt = GaussianProcess( + gp, + data, + :y; + optimization=NoHyperparameterOptimization() + ) + + gpr_opt = GaussianProcess( + gp, + data, + :y; + optimization=MaximumLikelihoodEstimation() + ) + + likelihood_no_opt = logpdf(gpr_no_opt.gp(RowVecs(x)), y) + likelihood_opt = logpdf(gpr_opt.gp(RowVecs(x)), y) + + @test likelihood_opt > likelihood_no_opt + end +end \ No newline at end of file From 9bf53e5c3b4479d6320eb979cdb1f09e26eb79d0 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 16:33:22 +0200 Subject: [PATCH 079/117] Add test to check implementation for all kernels, transforms and meanfunctions --- test/models/gp/parameterization.jl | 66 +++++++++++++++++------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/test/models/gp/parameterization.jl b/test/models/gp/parameterization.jl index f7505f3a1..019c16eb3 100644 --- a/test/models/gp/parameterization.jl +++ b/test/models/gp/parameterization.jl @@ -1,41 +1,51 @@ -exported_names = names(KernelFunctions; all=false) - -function is_kernel_type(sym) - obj = getfield(KernelFunctions, sym) # get type or value +function is_of_type( + exporting_module::Module, + name::Symbol, + type::DataType +) + obj = getfield(exporting_module, name) if obj isa DataType - return obj <: Kernel + return obj <: type elseif obj isa UnionAll - return obj.body <: Kernel + return obj.body <: type else return false end end -# Filter the symbols -kernel_symbols = filter(is_kernel_type, exported_names) +function get_exported_types( + exporting_module::Module, + type::DataType +) + exported_names = names(exporting_module; all=false) + type_symbols = filter(n -> is_of_type(exporting_module, n, type), exported_names) + types = map(sym -> getfield(exporting_module, sym), type_symbols) + return filter(t -> !isabstracttype(t), types) +end -# Map to actual type objects -kernel_types = [getfield(KernelFunctions, sym) for sym in kernel_symbols] +check_extract_parameters(type::Type) = hasmethod(UncertaintyQuantification.extract_parameters, Tuple{type}) +check_apply_parameters(type::Type) = hasmethod(UncertaintyQuantification.apply_parameters, Tuple{type, Any}) +check_implementation(type::Type) = check_extract_parameters(type) && check_apply_parameters(type) -function is_transform_type(sym) - obj = getfield(KernelFunctions, sym) - if obj isa DataType - return obj <: Transform - elseif obj isa UnionAll - return obj.body <: Transform - else - return false - end -end +@testset "GaussianProcessParameterHandling" begin + transforms = get_exported_types(KernelFunctions, Transform) + unimplemented_transforms = filter(!check_implementation, transforms) -transform_symbols = filter(is_transform_type, exported_names) -transform_types = getfield.(Ref(KernelFunctions), transform_symbols) + kernels = get_exported_types(KernelFunctions, Kernel) + unimplemented_kernels = filter(!check_implementation, kernels) -function check_implementation(t::Vector{Type}) - extract = map(tᵢ -> hasmethod(UncertaintyQuantification.extract_parameters, Tuple{tᵢ}), t) - return extract -end + meanfunctions = get_exported_types(AbstractGPs, AbstractGPs.MeanFunction) + unimplemented_meanfunctions = filter(!check_implementation, meanfunctions) -transform_types[.!check_implementation(transform_types)] + @testset "KernelFunctions.Transform" begin + @test isempty(unimplemented_transforms) || @error "Transform parameter handling not implemented for:\n "* join(string.(unimplemented_transforms), "\n ") + end -kernel_types[.!check_implementation(kernel_types)] \ No newline at end of file + @testset "KernelFunctions.Kernel" begin + @test isempty(unimplemented_kernels) || @error "Kernel parameter handling not implemented for:\n "* join(string.(unimplemented_kernels), "\n ") + end + + @testset "AbstractGPs.MeanFunction" begin + @test isempty(unimplemented_meanfunctions) || @error "Meanfunction parameter handling not implemented for:\n "* join(string.(unimplemented_meanfunctions), "\n ") + end +end From 42db9daf1e4a989ef99f044b89cb89176e89ffd7 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 20:30:47 +0200 Subject: [PATCH 080/117] Refactor data standardization tests --- test/models/gp/standardization.jl | 380 +++++++++++++++++------------- 1 file changed, 222 insertions(+), 158 deletions(-) diff --git a/test/models/gp/standardization.jl b/test/models/gp/standardization.jl index de56bf714..5b642bf4e 100644 --- a/test/models/gp/standardization.jl +++ b/test/models/gp/standardization.jl @@ -1,17 +1,152 @@ function make_standardizer( - dataframe, - input_names, - output, - it, - ot + data::DataFrame, + input::Union{Symbol, Vector{Symbol}}, + output::Symbol, + transform::UncertaintyQuantification.AbstractDataTransform, ) UncertaintyQuantification.DataStandardizer( - dataframe, input_names, output, - UncertaintyQuantification.InputTransform(it), - UncertaintyQuantification.OutputTransform(ot), + data, input, output, + UncertaintyQuantification.InputTransform(transform), + UncertaintyQuantification.OutputTransform(transform), ) end +function check_transform( + data::DataFrame, + input::Union{Symbol, Vector{Symbol}}, + output::Symbol, + transformed_vars::Tuple, + ::UncertaintyQuantification.IdentityTransform +) + tranformed_in, transformed_out, + inv_transformed_out, inv_transformed_out_var = transformed_vars + + if isa(input, Symbol) # 1D case + # Test input scaling + @test all(tranformed_in .== data[!, input]) + # Transformation should not do anything here + @test all(transformed_out .== inv_transformed_out) + # Test output inverse transform + @test all(data[!, output] .== inv_transformed_out) + # Test output inverse transform for variance (should not do anything to variance) + @test all(data[!, output] .== inv_transformed_out_var) + else # 2D case + # Test input scaling + tranformed_in = mapreduce(rv -> rv', vcat, tranformed_in) + @test all(tranformed_in .== Matrix(data[!, input])) + # Test output scaling + @test all(transformed_out .== inv_transformed_out) + # Test output inverse transform + @test all(data[!, output] .== inv_transformed_out) + # Test output inverse transform for variance + @test all(data[!, output] .== inv_transformed_out_var) + end +end + +function check_transform( + data::DataFrame, + input::Union{Symbol, Vector{Symbol}}, + output::Symbol, + transformed_vars::Tuple, + ::UncertaintyQuantification.ZScoreTransform +) + tranformed_in, transformed_out, + inv_transformed_out, inv_transformed_out_var = transformed_vars + + if isa(input, Symbol) # 1D case + # Test input scaling + μ = mean(data[!, input]) + σ = std(data[!, input]) + manually_transformed_in = (data[!, input] .- μ) ./ σ + @test all(tranformed_in .≈ manually_transformed_in) + + # Test output scaling + μ = mean(data[!, output]) + σ = std(data[!, output]) + manually_transformed_out = (data[!, output] .- μ) ./ σ + @test all(manually_transformed_out .≈ transformed_out) + + # Test output inverse transform + @test all(data[!, output] .≈ inv_transformed_out) + + # Test output inverse transform for variance + @test all(σ^2 * transformed_out .≈ inv_transformed_out_var) + else # 2D case + # Test input scaling + tranformed_in = mapreduce(rv -> rv', vcat, tranformed_in) + μ = mean(Matrix(data[!, input]), dims=1) + σ = std(Matrix(data[!, input]), dims=1) + manually_transformed_in = (Matrix(data[!, input]) .- μ) ./ σ + @test all(tranformed_in .≈ manually_transformed_in) + + # Test output scaling + μ = mean(data[!, output]) + σ = std(data[!, output]) + manually_transformed_out = (data[!, output] .- μ) ./ σ + @test all(manually_transformed_out .≈ transformed_out) + + # Test output inverse transform + @test all(data[!, output] .≈ inv_transformed_out) + + # Test output inverse transform for variance + @test all(σ^2 * transformed_out .≈ inv_transformed_out_var) + end +end + +function check_transform( + data::DataFrame, + input::Union{Symbol, Vector{Symbol}}, + output::Symbol, + transformed_vars::Tuple, + ::UncertaintyQuantification.UnitRangeTransform +) + tranformed_in, transformed_out, + inv_transformed_out, inv_transformed_out_var = transformed_vars + + if isa(input, Symbol) # 1D case + # Test input scaling + tmin, tmax = extrema(data[!, input]) + shift = tmin + scale = 1 / (tmax - tmin) + manually_transformed_in = (data[!, input] .- shift) * scale + @test all(tranformed_in .≈ manually_transformed_in) + + # Test output scaling + tmin, tmax = extrema(data[!, output]) + shift = tmin + scale = 1 / (tmax - tmin) + manually_transformed_out = (data[!, output] .- shift) * scale + @test all(manually_transformed_out .≈ transformed_out) + + # Test output inverse transform + @test all(data[!, output] .≈ inv_transformed_out) + + # Test output inverse transform for variance + @test all(scale^2 * transformed_out .≈ inv_transformed_out_var) + else # 2D case + # Test input scaling + tranformed_in = mapreduce(rv -> rv', vcat, tranformed_in) + extrs = extrema(Matrix(data[!, input]), dims=1) + shift = map(t -> t[1], extrs[1, :]) + scale = map(t -> 1 / (t[2] - t[1]), extrs[1, :]) + manually_transformed_in = (Matrix(data[!, input]) .- shift') .* scale' + @test all(tranformed_in .≈ manually_transformed_in) + + # Test output scaling + tmin, tmax = extrema(data[!, output]) + shift = tmin + scale = 1 / (tmax - tmin) + manually_transformed_out = (data[!, output] .- shift) * scale + @test all(manually_transformed_out .≈ transformed_out) + + # Test output inverse transform + @test all(data[!, output] .≈ inv_transformed_out) + + # Test output inverse transform for variance + @test all(scale^2 * transformed_out .≈ inv_transformed_out_var) + end +end + @testset "GaussianProcessDataStandardizer" begin transforms = [ UncertaintyQuantification.IdentityTransform(), @@ -23,163 +158,92 @@ end N = 10 output = :y - single_input = RandomVariable(Normal(-1, 0.5), :x1) - df_single = sample(single_input, N) - df_single[!, output] = rand(N) - - multi_input = RandomVariable.([Uniform(-2, 0), Normal(-1, 0.5), Uniform(0, 1)], [:x1, :x2, :x3]) - df_multi = sample(multi_input, N) - df_multi[!, output] = df_single[!, output] - - names_single = propertynames(df_single[:, Not(output)]) - names_multi = propertynames(df_multi[:, Not(output)]) - - for transform in transforms - @testset "$(nameof(typeof(transform)))" begin - for (testname, df, inputs, names) in [ - ("single input", df_single, single_input, names_single), - ("multi input", df_multi, multi_input, names_multi) - ] - @testset "$testname" begin - if isa(transform, UncertaintyQuantification.StandardNormalTransform) - @test_throws ArgumentError UncertaintyQuantification.DataStandardizer( - df, inputs, output, + @testset "OneDimensionalInput" begin + input = RandomVariable(Normal(-1, 0.5), :x1) + df = sample(input, N) + df[!, output] = rand(N) + names = propertynames(df[:, Not(output)]) + + for transform in transforms + @testset "$(nameof(typeof(transform)))" begin + # StandardNormalTransform should not work for Outputs! + if isa(transform, UncertaintyQuantification.StandardNormalTransform) + @test_throws ArgumentError datastandardizer = UncertaintyQuantification.DataStandardizer( + df, input, output, UncertaintyQuantification.InputTransform(transform), - UncertaintyQuantification.OutputTransform(transform) - ) - # Test output shapes for identity output transform to check StandardNormalTransform for inputs - dts = UncertaintyQuantification.DataStandardizer( - df, inputs, output, - UncertaintyQuantification.InputTransform(transform), - UncertaintyQuantification.OutputTransform( - UncertaintyQuantification.IdentityTransform() - ) + UncertaintyQuantification.OutputTransform(transform), ) - Xin = dts.fᵢ(df) - if testname == "single input" - # input gets transformed to a Vector - @test isa(Xin, Vector) - else - # input gets transformed to RowVecs - @test isa(Xin, RowVecs) - end - else - dts = UncertaintyQuantification.DataStandardizer( - df, inputs, output, + + datastandardizer = UncertaintyQuantification.DataStandardizer( + df, input, output, + UncertaintyQuantification.InputTransform(transform), + UncertaintyQuantification.OutputTransform(UncertaintyQuantification.IdentityTransform()), + ) + + tranformed_in = datastandardizer.fᵢ(df) + # input gets transformed to a Vector + @test isa(tranformed_in, Vector) + # TODO: Should test if input does get transformed to standard normal space, even though this relies on already tested internal implementation. + continue + end + + # Test all other transforms + datastandardizer = make_standardizer(df, names, output, transform) + tranformed_in = datastandardizer.fᵢ(df) + transformed_out = datastandardizer.fₒ(df) + inv_transformed_out = datastandardizer.fₒ⁻¹(transformed_out) + inv_transformed_out_var = datastandardizer.var_fₒ⁻¹(transformed_out) + + check_transform( + df, only(names), output, + (tranformed_in, transformed_out, inv_transformed_out, inv_transformed_out_var), + transform + ) + end + end + end + + @testset "MultiDimensionalInput" begin + input = RandomVariable.([Uniform(-2, 0), Normal(-1, 0.5), Uniform(0, 1)], [:x1, :x2, :x3]) + df = sample(input, N) + df[!, output] = rand(N) + names = propertynames(df[:, Not(output)]) + + for transform in transforms + @testset "$(nameof(typeof(transform)))" begin + # StandardNormalTransform should not work for Outputs! + if isa(transform, UncertaintyQuantification.StandardNormalTransform) + @test_throws ArgumentError datastandardizer = UncertaintyQuantification.DataStandardizer( + df, input, output, UncertaintyQuantification.InputTransform(transform), UncertaintyQuantification.OutputTransform(transform), ) - Xin = dts.fᵢ(df) - Xout = dts.fₒ(df) - Yout = dts.fₒ⁻¹(Xout) - var_Yout = dts.var_fₒ⁻¹(Xout) - if testname == "single input" - # input gets transformed to a Vector - @test isa(Xin, Vector) - if isa(transform, UncertaintyQuantification.IdentityTransform) - # Test input scaling - @test all(Xin .== df[!, only(names)]) - # Test output scaling - @test all(Yout .== Xout) - # Test output inverse transform - @test all(df[!, output] .== Yout) - # Test output inverse transform for variance - @test all(df[!, output] .== var_Yout) - - elseif isa(transform, UncertaintyQuantification.ZScoreTransform) - # Test input scaling - μ = mean(df[!, only(names)]) - σ = std(df[!, only(names)]) - Min = (df[!, only(names)] .- μ) ./ σ - @test all(Xin .≈ Min) - - # Test output scaling - μ = mean(df[!, output]) - σ = std(df[!, output]) - Mout = (df[!, output] .- μ) ./ σ - @test all(Mout .≈ Xout) - # Test output inverse transform - @test all(df[!, output] .≈ Yout) - # Test output inverse transform for variance - @test all(σ^2 * Xout .≈ var_Yout) - - elseif isa(transform, UncertaintyQuantification.UnitRangeTransform) - # Test input scaling - tmin, tmax = extrema(df[!, only(names)]) - shift = tmin - scale = 1 / (tmax - tmin) - Min = (df[!, only(names)] .- shift) * scale - @test all(Xin .≈ Min) - - # Test output scaling - tmin, tmax = extrema(df[!, output]) - shift = tmin - scale = 1 / (tmax - tmin) - Mout = (df[!, output] .- shift) * scale - @test all(Mout .≈ Xout) - # Test output inverse transform - @test all(df[!, output] .≈ Yout) - # Test output inverse transform for variance - @test all(scale^2 * Xout .≈ var_Yout) - - end - else - # input gets transformed to RowVecs - @test isa(Xin, RowVecs) - if isa(transform, UncertaintyQuantification.IdentityTransform) - # Test input scaling - Min = mapreduce(rv -> rv', vcat, Xin) - @test all(Min .== Matrix(df[!, names])) - # Test output scaling - @test all(Yout .== Xout) - # Test output inverse transform - @test all(df[!, output] .== Yout) - # Test output inverse transform for variance - @test all(df[!, output] .== var_Yout) - - elseif isa(transform, UncertaintyQuantification.ZScoreTransform) - # Test input scaling - Xin = mapreduce(rv -> rv', vcat, Xin) - μ = mean(Matrix(df[!, names]), dims=1) - σ = std(Matrix(df[!, names]), dims=1) - Min = (Matrix(df[!, names]) .- μ) ./ σ - @test all(Xin .≈ Min) - - # Test output scaling - μ = mean(df[!, output]) - σ = std(df[!, output]) - Mout = (df[!, output] .- μ) ./ σ - @test all(Mout .≈ Xout) - # Test output inverse transform - @test all(df[!, output] .≈ Yout) - # Test output inverse transform for variance - @test all(σ^2 * Xout .≈ var_Yout) - - elseif isa(transform, UncertaintyQuantification.UnitRangeTransform) - # Test input scaling - Xin = mapreduce(rv -> rv', vcat, Xin) - extrs = extrema(Matrix(df[!, names]), dims=1) - shift = map(t -> t[1], extrs[1, :]) - scale = map(t -> 1 / (t[2] - t[1]), extrs[1, :]) - Min = (Matrix(df[!, names]) .- shift') .* scale' - @test all(Xin .≈ Min) - - # Test output scaling - tmin, tmax = extrema(df[!, output]) - shift = tmin - scale = 1 / (tmax - tmin) - Mout = (df[!, output] .- shift) * scale - @test all(Mout .≈ Xout) - # Test output inverse transform - @test all(df[!, output] .≈ Yout) - # Test output inverse transform for variance - @test all(scale^2 * Xout .≈ var_Yout) - - end - end - end + datastandardizer = UncertaintyQuantification.DataStandardizer( + df, input, output, + UncertaintyQuantification.InputTransform(transform), + UncertaintyQuantification.OutputTransform(UncertaintyQuantification.IdentityTransform()), + ) + + tranformed_in = datastandardizer.fᵢ(df) + # input gets transformed to RowVecs + @test isa(tranformed_in, RowVecs) + # TODO: Should test if input does get transformed to standard normal space, even though this relies on already tested internal implementation. + continue end + + # Test all other transforms + datastandardizer = make_standardizer(df, names, output, transform) + tranformed_in = datastandardizer.fᵢ(df) + transformed_out = datastandardizer.fₒ(df) + inv_transformed_out = datastandardizer.fₒ⁻¹(transformed_out) + inv_transformed_out_var = datastandardizer.var_fₒ⁻¹(transformed_out) + + check_transform( + df, names, output, + (tranformed_in, transformed_out, inv_transformed_out, inv_transformed_out_var), + transform + ) end end end From 1e71f292dc2ba31c7869841dd2666e5f615b8382 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 20:32:00 +0200 Subject: [PATCH 081/117] Add complete test set for gaussian process regression constructors --- test/models/gp/gaussianprocess.jl | 261 ++++++++++++++++-------------- 1 file changed, 138 insertions(+), 123 deletions(-) diff --git a/test/models/gp/gaussianprocess.jl b/test/models/gp/gaussianprocess.jl index 8520ea3ad..57079cf30 100644 --- a/test/models/gp/gaussianprocess.jl +++ b/test/models/gp/gaussianprocess.jl @@ -1,185 +1,200 @@ +function build_and_evaluate_gp( + data::DataFrame, + input::Union{Symbol, Vector{Symbol}}, + output::Symbol, + gp::Union{AbstractGPs.GP, UncertaintyQuantification.NoisyGP}; + input_transform::UncertaintyQuantification.AbstractDataTransform=IdentityTransform(), + output_transform::UncertaintyQuantification.AbstractDataTransform=IdentityTransform() +) + gpr = GaussianProcess( + gp, + data, + output; + input_transform=input_transform, + output_transform=output_transform, + optimization=NoHyperparameterOptimization() + ) + + + test_data = select(data, input) + evaluate!(gpr, test_data) + mean_and_var!(gpr, test_data) + return test_data +end + +function build_and_evaluate_gp( + input::Union{UQInput, Vector{<:UQInput}}, + model::Union{UQModel, Vector{<:UQModel}}, + output::Symbol, + gp::Union{AbstractGPs.GP, UncertaintyQuantification.NoisyGP}; + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}=LatinHypercubeSampling(10), + input_transform::UncertaintyQuantification.AbstractDataTransform=UncertaintyQuantification.IdentityTransform(), + output_transform::UncertaintyQuantification.AbstractDataTransform=UncertaintyQuantification.IdentityTransform() +) + Random.seed!(1337) + gpr = GaussianProcess( + gp, + input, + model, + output, + experimentaldesign; + input_transform=input_transform, + output_transform=output_transform, + optimization=NoHyperparameterOptimization() + ) + + Random.seed!(42) + test_data = sample(input, experimentaldesign) + evaluate!(gpr, test_data) + mean_and_var!(gpr, test_data) + return test_data +end + @testset "GaussianProcessRegression" begin + # Input samples + n_input_samples = 10 + design = LatinHypercubeSampling(n_input_samples) + + # Use same base gp for every test + σ² = 1e-5 + base_gp = GP(0.0, SqExponentialKernel()) + base_gp_noisy = with_gaussian_noise(GP(0.0, SqExponentialKernel()), σ²) @testset "OneDimensionalInput" begin - # --------------------------------------------------- - # Test construction from DataFrame - x = collect(range(0, stop=5, length=10)) + # DataFrame input + x= collect(range(0, stop=5, length=n_input_samples)) y = sin.(x) data = DataFrame(:x => x, :y => y) - σ² = 1e-5 - kernel = SqExponentialKernel() + # UQInput + xrv = RandomVariable(Uniform(0, 5), :x) + model = Model( + df -> sin.(df.x), :y + ) - gp = GP(0.0, kernel) - gpr = GaussianProcess( - gp, + # Test construction from DataFrame + test_data = build_and_evaluate_gp( data, - :y; - optimization=NoHyperparameterOptimization() + :x, + :y, + base_gp; + input_transform=UncertaintyQuantification.IdentityTransform(), + output_transform=UncertaintyQuantification.IdentityTransform() ) - - test_data = DataFrame(:x => x) - evaluate!(gpr, test_data) - mean_and_var!(gpr, test_data) # evaluate! returns mean as standard @test all(test_data[!, :y] .== test_data[!, :y_mean]) # outputs at trainingset should be very close - @test all(isapprox.(test_data[!, :y], y; atol=1e-14)) + @test all(isapprox.(test_data[!, :y], y; atol=100*eps(Float64))) # variance should be very close to zero as we did not use observation noise - @test all(isapprox.(test_data[!, :y_var], 0.0; atol=1e-14)) + @test all(isapprox.(test_data[!, :y_var], 0.0; atol=100*eps(Float64))) - noisy_gp = with_gaussian_noise(GP(0.0, kernel), σ²) - noisy_gpr = GaussianProcess( - noisy_gp, + test_data_noisy = build_and_evaluate_gp( data, - :y; - optimization=NoHyperparameterOptimization() + :x, + :y, + base_gp_noisy; + input_transform=UncertaintyQuantification.IdentityTransform(), + output_transform=UncertaintyQuantification.IdentityTransform() ) - test_data = DataFrame(:x => x) - evaluate!(noisy_gpr, test_data) - mean_and_var!(noisy_gpr, test_data) - - # evaluate! returns mean as standard - @test all(test_data[!, :y] .== test_data[!, :y_mean]) # check if prediction variance is within 5% deviation from prescribed noise - @test all(abs.(test_data[!, :y_var] .- σ²) .< 0.05σ²) + @test all(abs.(test_data_noisy[!, :y_var] .- σ²) .< 0.05σ²) - # --------------------------------------------------- # Test construction from UQInput + UQModel - x = RandomVariable(Uniform(0, 5), :x) - model = Model( - df -> sin.(df.x), :y - ) - design = LatinHypercubeSampling(10) - - σ² = 1e-5 - kernel = SqExponentialKernel() - - gp = GP(0.0, kernel) - gpr = GaussianProcess( - gp, - x, + test_data_uqinput = build_and_evaluate_gp( + xrv, model, :y, - design; - optimization=NoHyperparameterOptimization() + base_gp; + experimentaldesign=design, + input_transform=UncertaintyQuantification.IdentityTransform(), + output_transform=UncertaintyQuantification.IdentityTransform() ) - - test_data = sample(x, design) - evaluate!(gpr, test_data) - mean_and_var!(gpr, test_data) # evaluate! returns mean as standard - @test all(test_data[!, :y] .== test_data[!, :y_mean]) + @test all(test_data_uqinput[!, :y] .== test_data_uqinput[!, :y_mean]) - noisy_gp = with_gaussian_noise(GP(0.0, kernel), σ²) - noisy_gpr = GaussianProcess( - noisy_gp, - x, + test_data_uqinput_noisy = build_and_evaluate_gp( + xrv, model, :y, - design; - optimization=NoHyperparameterOptimization() + base_gp_noisy; + experimentaldesign=design, + input_transform=UncertaintyQuantification.IdentityTransform(), + output_transform=UncertaintyQuantification.IdentityTransform() ) - test_data = sample(x, design) - evaluate!(gpr, test_data) - mean_and_var!(gpr, test_data) - # evaluate! returns mean as standard - @test all(test_data[!, :y] .== test_data[!, :y_mean]) + @test all(test_data_uqinput_noisy[!, :y] .== test_data_uqinput_noisy[!, :y_mean]) end - @testset "TwoDimensionalInput" begin - # --------------------------------------------------- - # Test construction from DataFrame - x = [collect(range(0, stop=5, length=10)), collect(range(0, stop=5, length=10))] - y = sin.(x[1]) + cos.(x[2]) - data = DataFrame(:x1 => x[1], :x2 => x[2], :y => y) - σ² = 1e-5 - kernel = SqExponentialKernel() + @testset "MultiDimensionalInput" begin + # DataFrame input + x = [collect(range(0, stop=5, length=n_input_samples)) collect(range(0, stop=5, length=n_input_samples))] + y = sin.(x[:, 1]) + cos.(x[:, 2]) + data = DataFrame(:x1 => x[:, 1], :x2 => x[:, 2], :y => y) - gp = GP(0.0, kernel) - gpr = GaussianProcess( - gp, + # UQInput + xrv = RandomVariable.([Uniform(0, 5), Uniform(0, 5)], [:x1, :x2]) + model = Model( + df -> sin.(df.x1) + cos.(df.x2), :y + ) + + # Test construction from DataFrame + test_data = build_and_evaluate_gp( data, - :y; - optimization=NoHyperparameterOptimization() + [:x1, :x2], + :y, + base_gp; + input_transform=UncertaintyQuantification.IdentityTransform(), + output_transform=UncertaintyQuantification.IdentityTransform() ) - - test_data = DataFrame(:x1 => x[1], :x2 => x[2]) - evaluate!(gpr, test_data) - mean_and_var!(gpr, test_data) # evaluate! returns mean as standard @test all(test_data[!, :y] .== test_data[!, :y_mean]) # outputs at trainingset should be very close - @test all(isapprox.(test_data[!, :y], y; atol=1e-14)) + @test all(isapprox.(test_data[!, :y], y; atol=100*eps(Float64))) # variance should be very close to zero as we did not use observation noise - @test all(isapprox.(test_data[!, :y_var], 0.0; atol=1e-14)) + @test all(isapprox.(test_data[!, :y_var], 0.0; atol=100*eps(Float64))) - noisy_gp = with_gaussian_noise(GP(0.0, kernel), σ²) - noisy_gpr = GaussianProcess( - noisy_gp, + test_data_noisy = build_and_evaluate_gp( data, - :y; - optimization=NoHyperparameterOptimization() + [:x1, :x2], + :y, + base_gp_noisy; + input_transform=UncertaintyQuantification.IdentityTransform(), + output_transform=UncertaintyQuantification.IdentityTransform() ) - test_data = DataFrame(:x1 => x[1], :x2 => x[2]) - evaluate!(noisy_gpr, test_data) - mean_and_var!(noisy_gpr, test_data) - - # evaluate! returns mean as standard - @test all(test_data[!, :y] .== test_data[!, :y_mean]) # check if prediction variance is within 5% deviation from prescribed noise - @test all(abs.(test_data[!, :y_var] .- σ²) .< 0.05σ²) + @test all(abs.(test_data_noisy[!, :y_var] .- σ²) .< 0.05σ²) - # --------------------------------------------------- # Test construction from UQInput + UQModel - x = RandomVariable.([Uniform(0, 5), Uniform(0, 5)], [:x1, :x2]) - model = Model( - df -> sin.(df.x1) + cos.(df.x2), :y - ) - design = LatinHypercubeSampling(10) - - σ² = 1e-5 - kernel = SqExponentialKernel() - - gp = GP(0.0, kernel) - gpr = GaussianProcess( - gp, - x, + test_data_uqinput = build_and_evaluate_gp( + xrv, model, :y, - design; - optimization=NoHyperparameterOptimization() + base_gp; + experimentaldesign=design, + input_transform=UncertaintyQuantification.IdentityTransform(), + output_transform=UncertaintyQuantification.IdentityTransform() ) - - test_data = sample(x, design) - evaluate!(gpr, test_data) - mean_and_var!(gpr, test_data) # evaluate! returns mean as standard - @test all(test_data[!, :y] .== test_data[!, :y_mean]) + @test all(test_data_uqinput[!, :y] .== test_data_uqinput[!, :y_mean]) - noisy_gp = with_gaussian_noise(GP(0.0, kernel), σ²) - noisy_gpr = GaussianProcess( - noisy_gp, - x, + test_data_uqinput_noisy = build_and_evaluate_gp( + xrv, model, :y, - design; - optimization=NoHyperparameterOptimization() + base_gp_noisy; + experimentaldesign=design, + input_transform=UncertaintyQuantification.IdentityTransform(), + output_transform=UncertaintyQuantification.IdentityTransform() ) - test_data = sample(x, design) - evaluate!(gpr, test_data) - mean_and_var!(gpr, test_data) - # evaluate! returns mean as standard - @test all(test_data[!, :y] .== test_data[!, :y_mean]) + @test all(test_data_uqinput_noisy[!, :y] .== test_data_uqinput_noisy[!, :y_mean]) end end \ No newline at end of file From 21117c198389c15ecdba1b660847f0233615238e Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 20:32:39 +0200 Subject: [PATCH 082/117] Refactor gaussian process hyperparameter optimization tests --- test/models/gp/hyperparametertuning.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/models/gp/hyperparametertuning.jl b/test/models/gp/hyperparametertuning.jl index d49aa88e1..88b0aba0e 100644 --- a/test/models/gp/hyperparametertuning.jl +++ b/test/models/gp/hyperparametertuning.jl @@ -28,6 +28,7 @@ @test likelihood_opt > likelihood_no_opt end + @testset "TwoDimensionalInput" begin x = [collect(range(0, stop=5, length=10)) collect(range(0, stop=5, length=10))] y = sin.(x[:, 1]) + cos.(x[:, 2]) From e4ab211d95d0f7724f17c848eb39cff281b54d49 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 20:32:53 +0200 Subject: [PATCH 083/117] Preliminary example --- demo/metamodels/gaussianprocess.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/metamodels/gaussianprocess.jl b/demo/metamodels/gaussianprocess.jl index b658ec5a0..15f6a5b70 100644 --- a/demo/metamodels/gaussianprocess.jl +++ b/demo/metamodels/gaussianprocess.jl @@ -22,7 +22,7 @@ optimizer = MaximumLikelihoodEstimation(Optim.Adam(alpha=0.01), Optim.Options(; # optimizer = MaximumLikelihoodEstimation(Optim.LBFGS(), Optim.Options(; iterations=10, show_trace=false)) gpr = GaussianProcess( - gp, x, himmelblau, :y, design; input_transform=ZScoreTransform(), output_transform=StandardNormalTransform(), optimization=optimizer + gp, x, himmelblau, :y, design; input_transform=ZScoreTransform(), output_transform=ZScoreTransform(), optimization=optimizer ) test_data = sample(x, 1000) From c892a4393dbc8236b0a1812e9ab5f4ed9d36fbe1 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 9 Oct 2025 20:33:12 +0200 Subject: [PATCH 084/117] Add documentation for exported structs --- src/models/gp/hyperparametertuning.jl | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/models/gp/hyperparametertuning.jl b/src/models/gp/hyperparametertuning.jl index 6b685b5e0..d9a039516 100644 --- a/src/models/gp/hyperparametertuning.jl +++ b/src/models/gp/hyperparametertuning.jl @@ -2,8 +2,42 @@ using DifferentiationInterface abstract type AbstractHyperparameterOptimization end +""" + NoHyperparameterOptimization() + +Creates a configuration that disables hyperparameter optimization for a Gaussian process model. + +# Examples + +```jldoctest +julia> NoHyperparameterOptimization() +NoHyperparameterOptimization() +``` +""" struct NoHyperparameterOptimization <: AbstractHyperparameterOptimization end +""" + MaximumLikelihoodEstimation(optimizer::Optim.FirstOrderOptimizer, options::Optim.Options) + +Represents a hyperparameter optimization strategy that maximizes the log marginal likelihood +of a Gaussian process model. + +# Constructors +* `MaximumLikelihoodEstimation()` (default: optimizer = Optim.LBFGS(), options = Optim.Options(; iterations=10, show_trace=false)) +* `MaximumLikelihoodEstimation(optimizer::Optim.FirstOrderOptimizer, options::Optim.Options)` + +# Note +You can choose from any gradient-based optimizer and set of options provided by [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/), +such as `LBFGS()`, `Adam()`, or `ConjugateGradient()`. + +# Examples + +```jldoctest +julia> MaximumLikelihoodEstimation(Optim.Adam(alpha=0.01), Optim.Options(; iterations=1000, show_trace=false)) +MaximumLikelihoodEstimation(Adam{Float64, Float64, Flat}(0.01, 0.9, 0.999, 1.0e-8, Flat()), Optim.Options(x_abstol = 0.0, x_reltol = 0.0, f_abstol = 0.0, f_reltol = 0.0, g_abstol = 1.0e-8, outer_x_abstol = 0.0, outer_x_reltol = 0.0, outer_f_abstol = 0.0, outer_f_reltol = 0.0, outer_g_abstol = 1.0e-8, f_calls_limit = 0, g_calls_limit = 0, h_calls_limit = 0, allow_f_increases = true, allow_outer_f_increases = true, successive_f_tol = 1, iterations = 1000, outer_iterations = 1000, store_trace = false, trace_simplex = false, show_trace = false, extended_trace = false, show_warnings = true, show_every = 1, time_limit = NaN, ) +) +``` +""" struct MaximumLikelihoodEstimation <: AbstractHyperparameterOptimization optimizer::Optim.FirstOrderOptimizer options::Optim.Options From 5c252928ab4aa5e017c7ae4b8a1fe040ae65b9fd Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 10 Oct 2025 15:46:23 +0200 Subject: [PATCH 085/117] Add documentation --- src/models/gp/parameterization.jl | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl index 5ea1c75ca..df38c70ff 100644 --- a/src/models/gp/parameterization.jl +++ b/src/models/gp/parameterization.jl @@ -165,20 +165,38 @@ apply_parameters(f::GP, θ) = GP( ) """ - NoisyGP + NoisyGP(gp::GP, σ²::Real) -A wrapper around `GP` that adds Gaussian observation noise `obs_noise`. +Wraps a Gaussian process `gp` and adds learnable Gaussian observation noise with zero mean and variance `σ²` to the diagonal of its finite-dimensional covariance matrix. """ struct NoisyGP{T<:GP,Tn<:Real} gp::T - obs_noise::Tn + σ²::Tn end -(gp::NoisyGP)(x) = gp.gp(x, gp.obs_noise) -with_gaussian_noise(gp::GP, obs_noise::Real) = NoisyGP(gp, obs_noise) +(gp::NoisyGP)(x) = gp.gp(x, gp.σ²) + +""" + with_gaussian_noise(gp::AbstractGPs.GP, σ²::Real) + +Wraps a Gaussian process `gp` with additive Gaussian observation noise of variance `σ²`. + +This creates a [`NoisyGP`](@ref) object, which adds `σ²` to the diagonal of the covariance +matrix when evaluating the finite-dimensional projection of `gp`. + +# Examples +```jldoctest +julia> using AbstractGPs + +julia> gp = GP(SqExponentialKernel()); + +julia> noisy_gp = with_gaussian_noise(gp, 0.1); +``` +""" +with_gaussian_noise(gp::GP, σ²::Real) = NoisyGP(gp, σ²) extract_parameters(f::NoisyGP) = ( extract_parameters(f.gp), - ParameterHandling.positive(f.obs_noise, exp, 1e-6) + ParameterHandling.positive(f.σ², exp, 1e-6) ) apply_parameters(f::NoisyGP, θ) = NoisyGP(apply_parameters(f.gp, θ[1]), θ[2]) \ No newline at end of file From 6369de157ba7737b5b5ed868618f3248907890f7 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 10 Oct 2025 16:03:18 +0200 Subject: [PATCH 086/117] Add developer note --- src/models/gp/parameterization.jl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl index df38c70ff..cbbb73a11 100644 --- a/src/models/gp/parameterization.jl +++ b/src/models/gp/parameterization.jl @@ -1,27 +1,29 @@ """ -Parameterized objects: a uniform interface for trainable models. +# Developer Note -`Parameterized(obj)` wraps an object so it can be called with a parameter vector `θ`: +`Parameterized(object)` wraps `object` so it can be called with parameters `θ`. + +`parameterize(object)` returns a parameterized, callable version of the object and its parameters. ```julia model, θ = parameterize(obj) model(θ) # returns a new object with parameters applied This works for mean functions, kernels, transformations, and Gaussian processes. -The system relies on two core functions: +Based on two core functions, this system can extract model parameters for an optimization routine +and apply potentially constrained parameters to the underlying model to compute the optimization objective. +The two core functions are: 1. extract_parameters(obj) Returns the free parameters of obj wrapped in ParameterHandling containers. Enforces constraints (e.g., positive or bounded) where applicable. - For composite objects (like KernelSum or GP), returns a tuple or vector of parameter sets. + For composite objects (like a `AbstractGPs.GP`), returns a tuple of componentwise parameter sets. Returns nothing for objects without trainable parameters. 2. apply_parameters(obj, θ) Returns a new object of the same type with parameters θ applied. For hierarchical objects, θ is expected to match the structure returned by extract_parameters. - -This interface enables generic optimization routines to work across all supported types. """ struct Parameterized{T} @@ -35,7 +37,6 @@ end parameterize(object) = Parameterized(object), extract_parameters(object) # ---------------- Mean functions ---------------- - extract_parameters(::ZeroMean) = nothing apply_parameters(m::ZeroMean, θ) = m @@ -48,7 +49,6 @@ extract_parameters(::CustomMean) = nothing apply_parameters(m::CustomMean, θ) = m # ---------------- Kernel functions ---------------- - # Kernels and transforms without parameters BaseKernelsWithoutParameters = Union{ ZeroKernel, WhiteKernel, CosineKernel, @@ -157,7 +157,6 @@ extract_parameters(t::ScaleTransform) = ParameterHandling.positive(t.s) apply_parameters(::ScaleTransform, θ) = ScaleTransform(θ) # ---------------- Gaussian Processes ---------------- - extract_parameters(f::GP) = (extract_parameters(f.mean), extract_parameters(f.kernel)) apply_parameters(f::GP, θ) = GP( apply_parameters(f.mean, θ[1]), From 5feb38c20deecabc3b09260fa050c2f16f34720b Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 10 Oct 2025 17:11:07 +0200 Subject: [PATCH 087/117] Add documentation and developer note --- src/models/gp/standardization.jl | 127 ++++++++++++++++++++++++++++--- 1 file changed, 115 insertions(+), 12 deletions(-) diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index fd74ba907..c5f9d5002 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -1,20 +1,68 @@ +abstract type AbstractDataTransform end + +# ---------------- Input/Output transforms ---------------- """ -Input/output transformations for datasets. + IdentityTransform() -- `AbstractInputTransform` / `AbstractOutputTransform`: base types for input and output preprocessing. -- `DataTransforms`: holds the chosen input and output transformations. -- `build_datatransform(data, input/output, transform)`: returns functions that apply (and, for outputs, invert) the transformations to a `DataFrame`. +A standardization transform that applies an identity transform to data. -Predefined transforms include: - - `NoInputTransform` / `NoOutputTransform`: no change. - - `ZScoreInputTransform` / `ZScoreOutputTransform`: standardize to zero mean, unit variance. -""" -abstract type AbstractDataTransform end +Used as an input or output transformation in a [`GaussianProcess`](@ref). +Internally, the `DataStandardizer` constructs the functions required for evaluation. -# ---------------- Input/Output transforms ---------------- +# Examples +```jldoctest +julia> id = UncertaintyQuantification.IdentityTransform() +UncertaintyQuantification.IdentityTransform() +``` +""" struct IdentityTransform <: AbstractDataTransform end + +""" + ZScoreTransform() + +A standardization transform that rescales data to zero mean and unit variance. + +Used as an input or output transformation in a [`GaussianProcess`](@ref). +Internally, the `DataStandardizer` constructs the functions required for evaluation. + +# Examples +```jldoctest +julia> zscore = UncertaintyQuantification.ZScoreTransform() +UncertaintyQuantification.ZScoreTransform() +``` +""" struct ZScoreTransform <: AbstractDataTransform end + +""" + UnitRangeTransform() + +A standardization transform that rescales data to the [0, 1] range. + +Used as an input or output transformation in a [`GaussianProcess`](@ref). +Internally, the `DataStandardizer` constructs the functions required for evaluation. + +# Examples +```jldoctest +julia> unitrange = UncertaintyQuantification.UnitRangeTransform() +UncertaintyQuantification.UnitRangeTransform() +``` +""" struct UnitRangeTransform <: AbstractDataTransform end + +""" + StandardNormalTransform() + +A normalization transform that transforms data to the standard normal space. + +Can only be used as an input transformation in a [`GaussianProcess`](@ref) for inputs of type [`UQInput`](@ref). +Internally, the `DataStandardizer` constructs the function required for evaluation. + +# Examples +```jldoctest +julia> sns = StandardNormalTransform() +StandardNormalTransform() +``` +""" struct StandardNormalTransform <: AbstractDataTransform end struct InputTransform{T <: AbstractDataTransform} end @@ -26,6 +74,62 @@ OutputTransform(::Type{T}) where {T <: AbstractDataTransform} = OutputTransform{ OutputTransform(x::AbstractDataTransform) = OutputTransform(typeof(x)) # ---------------- Struct for bundled transform functions ---------------- +""" +# Developer Note + + DataStandardizer(fᵢ::Function, fₒ::Function, fₒ⁻¹::Function, var_fₒ⁻¹::Function) + +Bundles input and output transformation functions for Gaussian process models. + +# Fields + +- `fᵢ` - function applied to input data. +- `fₒ` - function applied to output data. +- `fₒ⁻¹` - inverse function for the output transformation. +- `var_fₒ⁻¹` - function for transforming output variances. + +!!! note "Inverse output transformations" + +Gaussian process regression requires two distinct inverse transformations for the output: +one for the mean predictions (this same transformation can also be applied to function samples) and one for the variance predictions. + +Consider a z-score transformation of output ``y``: + ```math + \tilde{y} = \frac{y - μ}{σ}. + ``` +To recover the mean of the untransformed output, we can simply apply the inverse transformation: + ```math + E[y] = E[σ\tilde{y} + μ] = σE[\tilde{y}] + μ. + ``` +Analogously, sampled functions ``\tilde{y}_s`` from the Gaussian process regression model can be transformed back: + ```math + y_s = σ\tilde{y}_s + μ. + ``` +The variance, however, is untransformed as follows: + ```math + Var[y] = E[(σ\tilde{y} + μ - E[σ\tilde{y} + μ])^2] = E[(σ^2(\tilde{y} - E[\tilde{y}])^2] = σ^2 Var[\tilde{y}] + ``` +Hence, `fₒ⁻¹` and `var_fₒ⁻¹` must be implemented separately. + +# Constructor + + DataStandardizer( + data::DataFrame, + input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, + output::Symbol, + input_transform::InputTransform, + output_transform::OutputTransform + ) + +Constructs a set of transformation functions from the provided data and user-specified input/output transforms. +Internally, it uses `build_datatransform` to create the actual functions. + +# Purpose + +This struct allows [`GaussianProcess`](@ref) models to consistently apply input and output transformations + (like `ZScoreTransform` or `IdentityTransform`) while keeping the API simple for end-users. +The `AbstractDataTransform` structs signal the desired behavior, and `DataStandardizer` converts them into callable functions for internal use. +""" struct DataStandardizer fᵢ::Function fₒ::Function @@ -33,7 +137,6 @@ struct DataStandardizer var_fₒ⁻¹::Function end -# ---------------- Constructor ---------------- function DataStandardizer( data::DataFrame, input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, @@ -48,7 +151,7 @@ end # ---------------- Transform builders ---------------- """ -build_datatransform(data, input/output, transform) + build_datatransform(data, input/output, transform) Returns a function (or pair of functions for outputs) that applies the specified transformation to a dataframe. """ From 76bdc2c1ad6836769b46cdb18c08e930ab729e63 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Fri, 10 Oct 2025 17:16:46 +0200 Subject: [PATCH 088/117] Update DifferentiationInterface --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 69ce21ed4..07c70ac45 100644 --- a/Project.toml +++ b/Project.toml @@ -40,7 +40,7 @@ CovarianceEstimation = "0.2" DataFrames = "0.22, 1.0" DelimitedFiles = "1" Dierckx = "0.5" -DifferentiationInterface = "0.7.4" +DifferentiationInterface = "0.7.7" Distributions = "0.24, 0.25" FastGaussQuadrature = "0.4, 0.5, 1" FiniteDifferences = "0.12" From a40bbd30bd95cc6abe9256ded55609d1da59abd7 Mon Sep 17 00:00:00 2001 From: felixmett Date: Mon, 13 Oct 2025 15:46:09 +0200 Subject: [PATCH 089/117] Refactor gaussian process evaluate! --- src/UncertaintyQuantification.jl | 4 +- src/models/gp/gaussianprocess.jl | 76 ++++++++++++++++++++++--------- test/models/gp/gaussianprocess.jl | 53 +++++++++++---------- 3 files changed, 84 insertions(+), 49 deletions(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index 7097ad6a5..ed3230383 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -164,7 +164,7 @@ export evaluate! export gradient export gradient_in_standard_normal_space export mean -export mean_and_var! +# export mean_and_var! export multivariate_indices export periodogram export polynomialchaos @@ -180,7 +180,7 @@ export to_copula_space export to_physical_space! export to_standard_normal_space export to_standard_normal_space! -export var! +# export var! export with_gaussian_noise include("inputs/empiricaldistribution.jl") diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 1be9b82c9..64d10fc0c 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -12,7 +12,7 @@ function GaussianProcess( output::Symbol; input_transform::AbstractDataTransform=IdentityTransform(), output_transform::AbstractDataTransform=IdentityTransform(), - optimization::AbstractHyperparameterOptimization=MaximumLikelihoodEstimation() + optimization::AbstractHyperparameterOptimization=NoHyperparameterOptimization() ) input = propertynames(data[:, Not(output)]) # Is this always the case? @@ -47,12 +47,14 @@ function GaussianProcess( experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}; input_transform::AbstractDataTransform=IdentityTransform(), output_transform::AbstractDataTransform=IdentityTransform(), - optimization::AbstractHyperparameterOptimization=MaximumLikelihoodEstimation() + optimization::AbstractHyperparameterOptimization=NoHyperparameterOptimization() ) # build DataFrame data = sample(input, experimentaldesign) evaluate!(model, data) + # TODO: Deterministic input will break the GP kernel + # build in- and output transforms dts = DataStandardizer( data, input, output, @@ -77,31 +79,61 @@ function GaussianProcess( end # what should this calculate? Calculates only mean for now -function evaluate!(gp::GaussianProcess, data::DataFrame) +function evaluate!( + gp::GaussianProcess, + data::DataFrame; + mode::Symbol = :mean, + n_samples::Int = 1 +) x = gp.standardizer.fᵢ(data) - μ = mean(gp.gp(x)) + finite_projection = gp.gp(x) + + if mode === :mean + μ = mean(finite_projection) + col = Symbol(string(gp.output, "_mean")) + data[!, gp.output] = gp.standardizer.fₒ⁻¹(μ) + elseif mode === :var + σ² = var(finite_projection) + col = Symbol(string(gp.output, "_var")) + data[!, col] = gp.standardizer.var_fₒ⁻¹(σ²) + elseif mode === :mean_and_var + μ = mean(finite_projection) + σ² = var(finite_projection) + col_mean = Symbol(string(gp.output, "_mean")) + col_var = Symbol(string(gp.output, "_var")) + data[!, col_mean] = gp.standardizer.fₒ⁻¹(μ) + data[!, col_var] = gp.standardizer.var_fₒ⁻¹(σ²) + elseif mode === :sample + samples = rand(finite_projection, n_samples) + cols = [Symbol(string(gp.output, "_sample_", i)) for i in 1:n_samples] + foreach( + (colᵢ, sampleᵢ) -> data[!, colᵢ] = gp.standardizer.fₒ⁻¹(sampleᵢ), + cols, eachcol(samples) + ) + else + throw(ArgumentError("Unknown `GaussianProcess` evaluation mode: $mode")) + end - data[!, gp.output] = gp.standardizer.fₒ⁻¹(μ) return nothing end -function var!(gp::GaussianProcess, data::DataFrame) - x = gp.standardizer.fᵢ(data) - σ² = var(gp.gp(x)) +# function var!(gp::GaussianProcess, data::DataFrame) +# x = gp.standardizer.fᵢ(data) +# σ² = var(gp.gp(x)) - column_name = Symbol(string(gp.output, "_", "var")) - data[!, column_name] = gp.standardizer.var_fₒ⁻¹(σ²) - return nothing -end +# column_name = Symbol(string(gp.output, "_", "var")) +# data[!, column_name] = gp.standardizer.var_fₒ⁻¹(σ²) +# return nothing +# end -function mean_and_var!(gp::GaussianProcess, data::DataFrame) - x = gp.standardizer.fᵢ(data) - μ = mean(gp.gp(x)) - σ² = var(gp.gp(x)) +# function mean_and_var!(gp::GaussianProcess, data::DataFrame) +# x = gp.standardizer.fᵢ(data) +# μ = mean(gp.gp(x)) +# σ² = var(gp.gp(x)) - column_name_mean = Symbol(string(gp.output, "_", "mean")) - column_name_var = Symbol(string(gp.output, "_", "var")) - data[!, column_name_mean] = gp.standardizer.fₒ⁻¹(μ) - data[!, column_name_var] = gp.standardizer.var_fₒ⁻¹(σ²) - return nothing -end \ No newline at end of file +# column_name_mean = Symbol(string(gp.output, "_", "mean")) +# column_name_var = Symbol(string(gp.output, "_", "var")) +# data[!, column_name_mean] = gp.standardizer.fₒ⁻¹(μ) +# data[!, column_name_var] = gp.standardizer.var_fₒ⁻¹(σ²) +# return nothing +# end \ No newline at end of file diff --git a/test/models/gp/gaussianprocess.jl b/test/models/gp/gaussianprocess.jl index 57079cf30..0eeeb6f0d 100644 --- a/test/models/gp/gaussianprocess.jl +++ b/test/models/gp/gaussianprocess.jl @@ -17,8 +17,15 @@ function build_and_evaluate_gp( test_data = select(data, input) - evaluate!(gpr, test_data) - mean_and_var!(gpr, test_data) + evaluate!(gpr, test_data, mode=:mean_and_var) + + # Try sampling the gp if observation noise is present (numerical stability) + # Sampling relies on AbstractGPs and therefore is not tested further + if isa(gp, UncertaintyQuantification.NoisyGP) + evaluate!(gpr, test_data, mode=:sample) + evaluate!(gpr, test_data, mode=:sample, n_samples=2) + end + return test_data end @@ -31,7 +38,7 @@ function build_and_evaluate_gp( input_transform::UncertaintyQuantification.AbstractDataTransform=UncertaintyQuantification.IdentityTransform(), output_transform::UncertaintyQuantification.AbstractDataTransform=UncertaintyQuantification.IdentityTransform() ) - Random.seed!(1337) + Random.seed!(42) gpr = GaussianProcess( gp, input, @@ -43,10 +50,16 @@ function build_and_evaluate_gp( optimization=NoHyperparameterOptimization() ) - Random.seed!(42) test_data = sample(input, experimentaldesign) - evaluate!(gpr, test_data) - mean_and_var!(gpr, test_data) + evaluate!(gpr, test_data, mode=:mean_and_var) + + # Try sampling the gp if observation noise is present (numerical stability) + # Sampling relies on AbstractGPs and therefore is not tested further + if isa(gp, UncertaintyQuantification.NoisyGP) + evaluate!(gpr, test_data, mode=:sample) + evaluate!(gpr, test_data, mode=:sample, n_samples=2) + end + return test_data end @@ -82,10 +95,8 @@ end output_transform=UncertaintyQuantification.IdentityTransform() ) - # evaluate! returns mean as standard - @test all(test_data[!, :y] .== test_data[!, :y_mean]) - # outputs at trainingset should be very close - @test all(isapprox.(test_data[!, :y], y; atol=100*eps(Float64))) + # mean outputs at trainingset should be very close + @test all(isapprox.(test_data[!, :y_mean], y; atol=100*eps(Float64))) # variance should be very close to zero as we did not use observation noise @test all(isapprox.(test_data[!, :y_var], 0.0; atol=100*eps(Float64))) @@ -102,6 +113,9 @@ end @test all(abs.(test_data_noisy[!, :y_var] .- σ²) .< 0.05σ²) # Test construction from UQInput + UQModel + # Note: Here we essentially just test if it runs. + # Consistency checks should apply as internally the UQInput + UQModel are used to construct a DataFrame + # and then the tested method above is called. test_data_uqinput = build_and_evaluate_gp( xrv, model, @@ -111,9 +125,6 @@ end input_transform=UncertaintyQuantification.IdentityTransform(), output_transform=UncertaintyQuantification.IdentityTransform() ) - - # evaluate! returns mean as standard - @test all(test_data_uqinput[!, :y] .== test_data_uqinput[!, :y_mean]) test_data_uqinput_noisy = build_and_evaluate_gp( xrv, @@ -124,9 +135,6 @@ end input_transform=UncertaintyQuantification.IdentityTransform(), output_transform=UncertaintyQuantification.IdentityTransform() ) - - # evaluate! returns mean as standard - @test all(test_data_uqinput_noisy[!, :y] .== test_data_uqinput_noisy[!, :y_mean]) end @testset "MultiDimensionalInput" begin @@ -151,10 +159,8 @@ end output_transform=UncertaintyQuantification.IdentityTransform() ) - # evaluate! returns mean as standard - @test all(test_data[!, :y] .== test_data[!, :y_mean]) # outputs at trainingset should be very close - @test all(isapprox.(test_data[!, :y], y; atol=100*eps(Float64))) + @test all(isapprox.(test_data[!, :y_mean], y; atol=100*eps(Float64))) # variance should be very close to zero as we did not use observation noise @test all(isapprox.(test_data[!, :y_var], 0.0; atol=100*eps(Float64))) @@ -171,6 +177,9 @@ end @test all(abs.(test_data_noisy[!, :y_var] .- σ²) .< 0.05σ²) # Test construction from UQInput + UQModel + # Note: Here we essentially just test if it runs. + # Consistency checks should apply as internally the UQInput + UQModel are used to construct a DataFrame + # and then the tested method above is called. test_data_uqinput = build_and_evaluate_gp( xrv, model, @@ -180,9 +189,6 @@ end input_transform=UncertaintyQuantification.IdentityTransform(), output_transform=UncertaintyQuantification.IdentityTransform() ) - - # evaluate! returns mean as standard - @test all(test_data_uqinput[!, :y] .== test_data_uqinput[!, :y_mean]) test_data_uqinput_noisy = build_and_evaluate_gp( xrv, @@ -193,8 +199,5 @@ end input_transform=UncertaintyQuantification.IdentityTransform(), output_transform=UncertaintyQuantification.IdentityTransform() ) - - # evaluate! returns mean as standard - @test all(test_data_uqinput_noisy[!, :y] .== test_data_uqinput_noisy[!, :y_mean]) end end \ No newline at end of file From ee0ab23721b8a1e0c1cb0707e217e0b5b8dc5cb6 Mon Sep 17 00:00:00 2001 From: felixmett Date: Mon, 13 Oct 2025 16:38:41 +0200 Subject: [PATCH 090/117] Fix only RandomVariables are used as gaussian process input --- src/models/gp/gaussianprocess.jl | 108 +++++++++++++++++++++++------- src/models/gp/standardization.jl | 2 +- test/models/gp/gaussianprocess.jl | 8 +-- 3 files changed, 88 insertions(+), 30 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 64d10fc0c..f41107572 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -5,7 +5,42 @@ struct GaussianProcess <: UQModel standardizer::DataStandardizer end -# ---------------- Build from DataFrame ---------------- +""" + GaussianProcess( + gp::Union{AbstractGPs.GP, NoisyGP}, + data::DataFrame, + output::Symbol; + input_transform::AbstractDataTransform = IdentityTransform(), + output_transform::AbstractDataTransform = IdentityTransform(), + optimization::AbstractHyperparameterOptimization = NoHyperparameterOptimization() + ) + +Constructs a Gaussian process model for the given data and output variable. + +# Arguments +- `gp`: A Gaussian process object, typically from `AbstractGPs`, defining the kernel and mean. +- `data`: A `DataFrame` containing the input and output data. +- `output`: The name of the output (as a `Symbol`) to be modeled as the response variable. + +# Keyword Arguments +- `input_transform`: Transformation applied to input features before fitting. + Defaults to [`IdentityTransform()`](@ref). +- `output_transform`: Transformation applied to output data before fitting. + Defaults to [`IdentityTransform()`](@ref). +- `optimization`: Strategy for hyperparameter optimization. + Defaults to `NoHyperparameterOptimization()`. + +# Examples +```jldoctest +julia> using AbstractGPs + +julia> gp = with_gaussian_noise(GP(0.0, SqExponentialKernel()), 1e-3); + +julia> data = DataFrame(x = 1:10, y = [1, 4, 10, 15, 24, 37, 50, 62, 80, 101]); + +julia> gp_model = GaussianProcess(gp, data, :y); +``` +""" function GaussianProcess( gp::Union{AbstractGPs.GP, NoisyGP}, data::DataFrame, @@ -38,7 +73,50 @@ function GaussianProcess( ) end -# ---------------- Build from UQModel ---------------- +""" + GaussianProcess( + gp::Union{AbstractGPs.GP, NoisyGP}, + input::Union{UQInput, Vector{<:UQInput}}, + model::Union{UQModel, Vector{<:UQModel}}, + output::Symbol, + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}; + input_transform::AbstractDataTransform = IdentityTransform(), + output_transform::AbstractDataTransform = IdentityTransform(), + optimization::AbstractHyperparameterOptimization = NoHyperparameterOptimization() + ) + +Constructs a Gaussian process model for the given input and model. Evaluates the model using specified experimental design. + +# Arguments +- `gp`: A Gaussian process object, typically from `AbstractGPs`, defining the kernel and mean. +- `input`: Single input or vector of inputs. The Gaussian process will only consider inputs of type ['RandomVariable](@ref) as input features. +- `model`: Single model or vector of models of supertype [`UQModel`](@ref) that the Gaussian process is supposed to model. +- `output`: The name of the output (as a `Symbol`) to be modeled as the response variable. +- `experimentaldesign`: The strategy utilized for sampling the input variables. + +# Keyword Arguments +- `input_transform`: Transformation applied to input features before fitting. + Defaults to [`IdentityTransform()`](@ref). +- `output_transform`: Transformation applied to output data before fitting. + Defaults to [`IdentityTransform()`](@ref). +- `optimization`: Strategy for hyperparameter optimization. + Defaults to `NoHyperparameterOptimization()`. + +# Examples +```jldoctest +julia> using AbstractGPs + +julia> gp = with_gaussian_noise(GP(0.0, SqExponentialKernel()), 1e-3); + +julia> input = RandomVariable(Uniform(0, 5), :x); + +julia> model = Model(df -> sin.(df.x), :y); + +julia> design = LatinHypercubeSampling(10); + +julia> gp_model = GaussianProcess(gp, input, model, :y, design); +``` +""" function GaussianProcess( gp::Union{AbstractGPs.GP, NoisyGP}, input::Union{UQInput, Vector{<:UQInput}}, @@ -53,7 +131,8 @@ function GaussianProcess( data = sample(input, experimentaldesign) evaluate!(model, data) - # TODO: Deterministic input will break the GP kernel + # Repeated deterministic input will break the GP kernel + filter!(i -> isa(i, RandomVariable), input) # build in- and output transforms dts = DataStandardizer( @@ -115,25 +194,4 @@ function evaluate!( end return nothing -end - -# function var!(gp::GaussianProcess, data::DataFrame) -# x = gp.standardizer.fᵢ(data) -# σ² = var(gp.gp(x)) - -# column_name = Symbol(string(gp.output, "_", "var")) -# data[!, column_name] = gp.standardizer.var_fₒ⁻¹(σ²) -# return nothing -# end - -# function mean_and_var!(gp::GaussianProcess, data::DataFrame) -# x = gp.standardizer.fᵢ(data) -# μ = mean(gp.gp(x)) -# σ² = var(gp.gp(x)) - -# column_name_mean = Symbol(string(gp.output, "_", "mean")) -# column_name_var = Symbol(string(gp.output, "_", "var")) -# data[!, column_name_mean] = gp.standardizer.fₒ⁻¹(μ) -# data[!, column_name_var] = gp.standardizer.var_fₒ⁻¹(σ²) -# return nothing -# end \ No newline at end of file +end \ No newline at end of file diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index c5f9d5002..23e6ae7f1 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -139,7 +139,7 @@ end function DataStandardizer( data::DataFrame, - input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, + input::Union{Symbol, Vector{<:Symbol}, RandomVariable, Vector{<:RandomVariable}}, output::Symbol, input_transform::InputTransform, output_transform::OutputTransform diff --git a/test/models/gp/gaussianprocess.jl b/test/models/gp/gaussianprocess.jl index 0eeeb6f0d..f16561d56 100644 --- a/test/models/gp/gaussianprocess.jl +++ b/test/models/gp/gaussianprocess.jl @@ -80,9 +80,9 @@ end data = DataFrame(:x => x, :y => y) # UQInput - xrv = RandomVariable(Uniform(0, 5), :x) + xrv = [Parameter(1.5, :p), RandomVariable(Uniform(0, 5), :x)] model = Model( - df -> sin.(df.x), :y + df -> df.p .* sin.(df.x), :y ) # Test construction from DataFrame @@ -144,9 +144,9 @@ end data = DataFrame(:x1 => x[:, 1], :x2 => x[:, 2], :y => y) # UQInput - xrv = RandomVariable.([Uniform(0, 5), Uniform(0, 5)], [:x1, :x2]) + xrv = [Parameter(1.5, :p), RandomVariable(Uniform(0, 5), :x1), RandomVariable(Uniform(0, 5), :x2)] model = Model( - df -> sin.(df.x1) + cos.(df.x2), :y + df -> df.p .* sin.(df.x1) + df.p .* cos.(df.x2), :y ) # Test construction from DataFrame From c92db59e27792830347bb1b78c490e57ff73aa1e Mon Sep 17 00:00:00 2001 From: felixmett Date: Mon, 13 Oct 2025 17:09:58 +0200 Subject: [PATCH 091/117] Fix wrongful discarding of deterministic inputs --- src/models/gp/gaussianprocess.jl | 56 ++++++++++++++++++++++++++++++-- src/models/gp/standardization.jl | 2 +- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index f41107572..9da314d3d 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -132,11 +132,12 @@ function GaussianProcess( evaluate!(model, data) # Repeated deterministic input will break the GP kernel - filter!(i -> isa(i, RandomVariable), input) + random_input = filter(i -> isa(i, RandomVariable), input) # build in- and output transforms + # note: this will let the gp model extract random inputs only from any evaluation input dts = DataStandardizer( - data, input, output, + data, random_input, output, InputTransform(input_transform), OutputTransform(output_transform) ) @@ -157,7 +158,56 @@ function GaussianProcess( ) end -# what should this calculate? Calculates only mean for now +""" + evaluate!(gp::GaussianProcess, data::DataFrame; mode::Symbol = :mean, n_samples::Int = 1) + +Evaluates a fitted [`GaussianProcess`](@ref) model at the specified input locations. + +# Arguments +- `gp`: Trained Gaussian process model to be evaluated. +- `data`: A `DataFrame` containing the input locations at which predictions are computed. + +# Keyword Arguments +- `mode`: A `Symbol` specifying the type of output to return. + Supported options are: + - `:mean` - predictive mean (default) + - `:var` - predictive variance + - `:mean_and_var` - both mean and variance + - `:sample` - random samples from the predictive distribution +- `n_samples`: Number of samples to draw when `mode = :sample`. Ignored otherwise. + (Note: Sampling can be unstable when input locations are very close together, leading to numerical issues in the covariance matrix.) + +# Examples +```jldoctest +julia> using AbstractGPs + +julia> gp = with_gaussian_noise(GP(0.0, SqExponentialKernel()), 1e-3); + +julia> data = DataFrame(x = 1:10, y = [1, 4, 10, 15, 24, 37, 50, 62, 80, 101]); + +julia> gp_model = GaussianProcess(gp, data, :y); + +julia> df = DataFrame(x = [0.5, 1.5, 2.5, 5.5, 8.5]); + +julia> evaluate!(gp_model, df; mode=:mean_and_var); + +julia> df.y_mean |> DisplayAs.withcontext(:compact => true) +5-element Vector{Float64}: + 0.616222 + 1.98099 + 6.93425 + 30.5658 + 68.1663 + +julia> df.y_var |> DisplayAs.withcontext(:compact => true) +5-element Vector{Float64}: + 0.125804 + 0.0143887 + 0.0080906 + 0.00622953 + 0.0080906 +``` +""" function evaluate!( gp::GaussianProcess, data::DataFrame; diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index 23e6ae7f1..c5f9d5002 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -139,7 +139,7 @@ end function DataStandardizer( data::DataFrame, - input::Union{Symbol, Vector{<:Symbol}, RandomVariable, Vector{<:RandomVariable}}, + input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, output::Symbol, input_transform::InputTransform, output_transform::OutputTransform From 9118cf49cde5db68d6323a0f0666b0832ce6f463 Mon Sep 17 00:00:00 2001 From: felixmett Date: Mon, 13 Oct 2025 19:51:28 +0200 Subject: [PATCH 092/117] Preliminary examples --- demo/metamodels/easyExampleGP.jl | 35 ++++++++++++++++++++++++ demo/metamodels/simpleGPexample.jl | 43 ++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 demo/metamodels/easyExampleGP.jl create mode 100644 demo/metamodels/simpleGPexample.jl diff --git a/demo/metamodels/easyExampleGP.jl b/demo/metamodels/easyExampleGP.jl new file mode 100644 index 000000000..a75ca8896 --- /dev/null +++ b/demo/metamodels/easyExampleGP.jl @@ -0,0 +1,35 @@ +using AbstractGPs +using UncertaintyQuantification +using DataFrames +using Random +using DisplayAs + +Random.seed!(42) + +x = collect(range(0, 10, 10)) +noise_var = 0.1 +y = sin.(x) + 0.3 * cos.(2 .* x) # + noise_var .* randn(length(x)) +df = DataFrame(x = x, y = y) + +σ² = 1e-5 +kernel = SqExponentialKernel() ∘ ScaleTransform(3.0)# ∘ ScaleTransform(1.0) +gp = with_gaussian_noise(GP(0.0, kernel), σ²) + +gpr = GaussianProcess(gp, df, :y) # ; optimization=MaximumLikelihoodEstimation() + +using Plots + +x_plot = collect(range(0, 5, 500)) +y_true = sin.(x_plot) + 0.3 * cos.(2 .* x_plot) + +prediction = DataFrame(:x => x_plot) +evaluate!(gpr, prediction; mode=:mean_and_var) + +prediction_mean = prediction[!, :y_mean] +prediction_std = sqrt.(prediction[!, :y_var]) + +plot(x_plot, prediction_mean, color=:blue, label="Mean prediction") +plot!(x_plot, prediction_mean, ribbon=2 .* prediction_std, color=:grey, alpha=0.5, label="Confidence band") + +# Optionally add ground truth function +plot!(x_plot, y_true, color=:red, label="Ground truth") \ No newline at end of file diff --git a/demo/metamodels/simpleGPexample.jl b/demo/metamodels/simpleGPexample.jl new file mode 100644 index 000000000..f2c246335 --- /dev/null +++ b/demo/metamodels/simpleGPexample.jl @@ -0,0 +1,43 @@ +using AbstractGPs +using UncertaintyQuantification +using DataFrames +using Random + +Random.seed!(42) + +x = RandomVariable(Uniform(0, 5), :x) + +σ = 0.1 +f = 3.0 +noisy_sinus = Model( + df -> sin.(f .* df.x) .+ σ .* randn(size(df.x)), :y +) +experimentaldesign = LatinHypercubeSampling(10) + +σ² = σ^2 +kernel = SqExponentialKernel() +kernel = SqExponentialKernel() + PeriodicKernel(; r=[f]) +gp = with_gaussian_noise(GP(0.0, kernel), σ²) +gp = GP(0.0, kernel) + +gpr = GaussianProcess( + gp, x, noisy_sinus, :y, experimentaldesign; + #optimization=NoHyperparameterOptimization() +) + +using Plots + +x_plot = collect(range(0, 5, 500)) +y_true = sin.(f .* x_plot) + +prediction = DataFrame(:x => x_plot) +mean_and_var!(gpr, prediction) + +prediction_mean = prediction[!, :y_mean] +prediction_std = sqrt.(prediction[!, :y_var]) + +plot(x_plot, prediction_mean, color=:blue, label="Mean prediction") +plot!(x_plot, prediction_mean, ribbon=2 .* prediction_std, color=:grey, alpha=0.5, label="Confidence band") + +# Optionally add ground truth function +plot!(x_plot, y_true, color=:red, label="Ground truth") \ No newline at end of file From 132145d31e05fafa5eee6634ea5514108bdfa9fa Mon Sep 17 00:00:00 2001 From: felixmett Date: Mon, 13 Oct 2025 19:51:59 +0200 Subject: [PATCH 093/117] Current state of documentation --- docs/src/manual/metamodels.md | 82 ++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 12 deletions(-) diff --git a/docs/src/manual/metamodels.md b/docs/src/manual/metamodels.md index b69b8db80..128922f6a 100644 --- a/docs/src/manual/metamodels.md +++ b/docs/src/manual/metamodels.md @@ -13,7 +13,7 @@ A Response Surface is a simple polynomial surrogate model. It can be trained by ## Gaussian Process Regression ### Theoretical Background -A Gaussian Process (GP) is a collection of random variables, any finite subset of which has a joint Gaussian distribution. It is fully specified by a mean function ``m(x)`` and a covariance (kernel) function ``k(x, x')``. In GP regression, we aim to model an unknown function ``f(x)``. Before observing any data, we assume that the function ``f(x)`` is distributed according to a GP: +A Gaussian Process (GP) is a collection of random variables, any finite subset of which has a joint Gaussian distribution. It is fully specified by a mean function $m(x)$ and a covariance (kernel) function $k(x, x')$. In GP regression, we aim to model an unknown function $f(x)$. Before observing any data, we assume that the function $f(x)$ is distributed according to a GP: ```math f(x) \sim \mathcal{G}\mathcal{P}\left( m(x), k(x, x') \right). @@ -21,31 +21,40 @@ f(x) \sim \mathcal{G}\mathcal{P}\left( m(x), k(x, x') \right). This prior GP specifies that any finite collection of function values follows a multivariate normal distribution. +To define a prior GP we use [`AbstractGPs.jl`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/) for the GP interface and mean function, and [`KernelFunctions.jl`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/) for the definition of a covariance kernel. Below, we construct a simple prior GP with a constant zero mean function and a scaled squared exponential kernel: + +```@example gaussianprocess +using AbstractGPs +kernel = SqExponentialKernel() ∘ ScaleTransform(3.0) +gp = GP(0.0, kernel) +return nothing # hide +``` + #### Posterior Gaussian Process -The posterior Gaussian Process represents the distribution of functions after incorporating observed data. We denote the observation data as: +The posterior GP represents the distribution of functions after incorporating observed data. We denote the observation data as: ```math \mathcal{D} = \lbrace (\hat{x}_i, \hat{f}_i) \mid i=1, \dots, N \rbrace, ``` -where ``\hat{f}_i = f(\hat{x}_i)`` in the noise-free observation case, and ``\hat{f}_i = f(\hat{x}_i) + \varepsilon_i`` in the noisy case, with independent noise terms ``\varepsilon_i \sim \mathcal{N}(0, \sigma_\varepsilon^2)``. Let ``\hat{X} = [\hat{x}_1, \dots, \hat{x}_N]`` denote the collection of observation data locations. The corresponding mean vector and covariance matrix are: +where $\hat{f}_i = f(\hat{x}_i)$ in the noise-free observation case, and $\hat{f}_i = f(\hat{x}_i) + e_i$ in the noisy case, with independent noise terms $e_i \sim \mathcal{N}(0, \sigma_e^2)$. Let $\hat{X} = [\hat{x}_1, \dots, \hat{x}_N]$ denote the collection of observation data locations. The corresponding mean vector and covariance matrix are: ```math \mu(\hat{X}) = [m(\hat{x}_1), \dots, m(\hat{x}_N)], \quad K(\hat{X}, \hat{X}) \text{ with entries } K_{ij} = k(\hat{x}_i, \hat{x}_j). ``` -For a new input location ``x^*`` we are interested at the unknown function value ``f^* = f(x^*)``. By the definition of a GP, the joint distribution of observed outputs ``\hat{f}_i`` and the unknown ``f^*`` is multivariate Gaussian: +For a new input location $x^*$ we are interested at the unknown function value $f^* = f(x^*)$. By the definition of a GP, the joint distribution of observed outputs $\hat{f}_i$ and the unknown $f^*$ is multivariate Gaussian: ```math \begin{bmatrix} \hat{f}\\ f^* \end{bmatrix} = \mathcal{N}\left( \begin{bmatrix} \mu(\hat{X}) \\ m(x^*) \end{bmatrix}, \begin{bmatrix} K(\hat{X}, \hat{X}) & K(\hat{X}, x^*)\\ K(x^*, \hat{X}) & K(x^*, x^*) \end{bmatrix} \right), ``` where: -- ``K(\hat{X}, \hat{X})`` is the covariance matrix with entries ``K_{ij} = k(\hat{x}_i, \hat{x}_j)``, -- ``K(\hat{X}, x^*)`` is the covariance matrix with entries ``K_{i1} = k(\hat{x}_i, x^*)``, -- and ``K(x^*, x^*)`` is the variance at the unknown input location. +- $K(\hat{X}, \hat{X})$ is the covariance matrix with entries $K_{ij} = k(\hat{x}_i, \hat{x}_j)$, +- $K(\hat{X}, x^*)$ is the covariance matrix with entries $K_{i1} = k(\hat{x}_i, x^*)$, +- and $K(x^*, x^*)$ is the variance at the unknown input location. -We can then obtain the posterior distribution of ``f^*`` from the properties of multivariate Gaussian distributions (see, e.g. Appendix A.2 in [rasmussen2005gaussian](@cite)), by conditioning the joint Gaussian on the observed outputs ``\hat{f}_i``: +We can then obtain the posterior distribution of $f^*$ from the properties of multivariate Gaussian distributions (see, e.g. Appendix A.2 in [rasmussen2005gaussian](@cite)), by conditioning the joint Gaussian on the observed outputs $\hat{f}_i$: ```math f^* \mid \hat{X}, \hat{f}, x^* \sim \mathcal{N}(\mu^*(x^*), \Sigma^*(x^*)), @@ -58,15 +67,64 @@ with \Sigma^*(x^*) = K(x^*, x^*) - K(x^*, \hat{X})K(\hat{X}, \hat{X})^{-1}K(\hat{X}, x^*). ``` -In the noisy observation case, the covariance between training points is adjusted by adding the noise variance:: +In the noisy observation case, the covariance between training points is adjusted by adding the noise variance: ```math -K(\hat{X}, \hat{X}) \rightarrow K(\hat{X}, \hat{X}) + \sigma^2_{\varepsilon}I. +K(\hat{X}, \hat{X}) \rightarrow K(\hat{X}, \hat{X}) + \sigma^2_{e}I. ``` The computation of the posterior predictive distribution generalizes straightforwardly to multiple input locations, providing both the posterior mean, which can serve as a regression estimate of the unknown function, and the posterior variances, which quantify the uncertainty at each point. Because the posterior is multivariate Gaussian, one can also sample function realizations at specified locations to visualize possible functions consistent with the observed data. +To construct a posterior GP from our previously defined prior GP, we need to define training data in form of a `DataFrame`. Constructing a `GaussianProcess` model will then automatically compute the posterior GP to predict requested the modeled output $y$. In this example, we equip the prior GP with a small Gaussian observation noise with zero mean and variance $\sigma^2_{e}=\sigma^2$, which improves the numerical stability of the covariance matrix. + +```@example gaussianprocess +using DataFrames # hide +x = collect(range(0, 10, 10)) +y = sin.(x) + 0.3 * cos.(2 .* x) +df = DataFrame(x = x, y = y) + +σ² = 1e-5 +gp = with_gaussian_noise(gp, σ²) +posterior_gp = GaussianProcess(gp, df, :y) +return nothing # hide +``` + +```@example gaussianprocess +using Plots # hide +x_plot = collect(range(0, 5, 500)) # hide +y_true = sin.(x_plot) + 0.3 * cos.(2 .* x_plot) # hide + +prediction = DataFrame(:x => x_plot) # hide +evaluate!(posterior_gp, prediction; mode=:mean_and_var) # hide +prediction_mean = prediction[!, :y_mean] # hide +prediction_std = sqrt.(prediction[!, :y_var]) # hide + +p = plot(x_plot, prediction_mean, color=:blue, label="Mean prediction") # hide +plot!( + x_plot, prediction_mean, ribbon=2 .* prediction_std, + color=:grey, alpha=0.5, label="Confidence band" +) # hide +plot!(x_plot, y_true, color=:red, label="True function") # hide + +savefig(p, "posterior-gp.svg"); # hide +return nothing # hide +``` +![](posterior-gp.svg) + #### Hyperparameter optimization -The GP prior, together with the observed data, defines a posterior distribution over functions that captures predictions at new inputs, including uncertainty. The noise-free and noisy cases differ only in the posterior covariance, which incorporates the observation noise when present. +Gaussian process models typically contain hyperparameters in their mean functions $m(x; \theta_m)$ and covariance kernel functions $k(x, x'; \theta_k)$. The observation noise variance $\sigma^2_{e}$ is also considered a hyperparameter related to the kernel. The choice of hyperparameters strongly affects the quality of the posterior GP. A common approach to selecting hyperparameters is maximum likelihood estimation (MLE) (see, e.g. [rasmussen2005gaussian](@cite)), where we maximize the likelihood of observing the training data $\mathcal{D}$ under the chosen GP prior. + +The marginal likelihood of the observed training outputs $\hat{f}$ is: + +```math +p(\hat{f} \mid \hat{X}, \theta_m, \theta_k, \sigma^2_{e}) = \mathcal{N}(\hat{f} \mid \mu_{\theta_m}(\hat{X}), K_{\theta_k}(\hat{X}, \hat{X}) + \sigma^2_{e}I), +``` + +where $\mu_{\theta_m}(\hat{X})$ and $K_{\theta_k}(\hat{X}, \hat{X})$ denote the parameter dependent versions of the previously defined quantities. For numerical reasons, the logarithm of the marginal likelihood is typically used. Maximizing the log marginal likelihood with respect to the hyperparameters then yields the parameters that best explain the observed data. After obtaining the optimal hyperparamters, the posterior GP can be constructed as described above. + +- Custom mean functions [`CustomMean`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/api/#AbstractGPs.CustomMean) +- Multi-output kernels [`MOKernel`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/kernels/#Multi-output-Kernels) +- Neural kernel networks [`NeuralKernelNetwork`] +- FunctionTransform +- GibbsKernel -### Constructing A Gaussian Process Regression Model \ No newline at end of file From 5e7a74511222f57cb84a7937cb0b8f4b7c8aa494 Mon Sep 17 00:00:00 2001 From: felixmett Date: Mon, 13 Oct 2025 19:54:21 +0200 Subject: [PATCH 094/117] Current state of docs --- docs/src/manual/metamodels.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/src/manual/metamodels.md b/docs/src/manual/metamodels.md index 128922f6a..3e622ccc5 100644 --- a/docs/src/manual/metamodels.md +++ b/docs/src/manual/metamodels.md @@ -24,7 +24,9 @@ This prior GP specifies that any finite collection of function values follows a To define a prior GP we use [`AbstractGPs.jl`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/) for the GP interface and mean function, and [`KernelFunctions.jl`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/) for the definition of a covariance kernel. Below, we construct a simple prior GP with a constant zero mean function and a scaled squared exponential kernel: ```@example gaussianprocess +using UncertaintyQuantification # hide using AbstractGPs + kernel = SqExponentialKernel() ∘ ScaleTransform(3.0) gp = GP(0.0, kernel) return nothing # hide From 3a1906b969a814f8cd29714165adb2b976a587f8 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 11:07:36 +0200 Subject: [PATCH 095/117] Finish gaussian process documentation --- docs/src/manual/metamodels.md | 70 +++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/docs/src/manual/metamodels.md b/docs/src/manual/metamodels.md index 3e622ccc5..064032fcf 100644 --- a/docs/src/manual/metamodels.md +++ b/docs/src/manual/metamodels.md @@ -87,26 +87,30 @@ df = DataFrame(x = x, y = y) σ² = 1e-5 gp = with_gaussian_noise(gp, σ²) -posterior_gp = GaussianProcess(gp, df, :y) +gp_model = GaussianProcess(gp, df, :y) return nothing # hide ``` +Now we can use our GP model to predict at new input locations `x_test`: + ```@example gaussianprocess using Plots # hide -x_plot = collect(range(0, 5, 500)) # hide -y_true = sin.(x_plot) + 0.3 * cos.(2 .* x_plot) # hide +x_test = collect(range(0, 5, 500)) +prediction = DataFrame(:x => x_test) + +evaluate!(gp_model, prediction; mode=:mean_and_var) -prediction = DataFrame(:x => x_plot) # hide -evaluate!(posterior_gp, prediction; mode=:mean_and_var) # hide prediction_mean = prediction[!, :y_mean] # hide prediction_std = sqrt.(prediction[!, :y_var]) # hide -p = plot(x_plot, prediction_mean, color=:blue, label="Mean prediction") # hide +p = plot(x_test, prediction_mean, color=:blue, label="Mean prediction") # hide plot!( - x_plot, prediction_mean, ribbon=2 .* prediction_std, + x_test, prediction_mean, ribbon=2 .* prediction_std, color=:grey, alpha=0.5, label="Confidence band" ) # hide -plot!(x_plot, y_true, color=:red, label="True function") # hide + +y_true = sin.(x_test) + 0.3 * cos.(2 .* x_test) # hide +plot!(x_test, y_true, color=:red, label="True function") # hide savefig(p, "posterior-gp.svg"); # hide return nothing # hide @@ -114,7 +118,9 @@ return nothing # hide ![](posterior-gp.svg) #### Hyperparameter optimization -Gaussian process models typically contain hyperparameters in their mean functions $m(x; \theta_m)$ and covariance kernel functions $k(x, x'; \theta_k)$. The observation noise variance $\sigma^2_{e}$ is also considered a hyperparameter related to the kernel. The choice of hyperparameters strongly affects the quality of the posterior GP. A common approach to selecting hyperparameters is maximum likelihood estimation (MLE) (see, e.g. [rasmussen2005gaussian](@cite)), where we maximize the likelihood of observing the training data $\mathcal{D}$ under the chosen GP prior. +GP models typically contain hyperparameters in their mean functions $m(x; \theta_m)$ and covariance kernel functions $k(x, x'; \theta_k)$. The observation noise variance $\sigma^2_{e}$ is also considered a hyperparameter related to the kernel. The choice of hyperparameters strongly affects the quality of the posterior GP. + +A common approach to selecting hyperparameters is maximum likelihood estimation (MLE) (see, e.g. [rasmussen2005gaussian](@cite)), where we maximize the likelihood of observing the training data $\mathcal{D}$ under the chosen GP prior. The marginal likelihood of the observed training outputs $\hat{f}$ is: @@ -122,11 +128,45 @@ The marginal likelihood of the observed training outputs $\hat{f}$ is: p(\hat{f} \mid \hat{X}, \theta_m, \theta_k, \sigma^2_{e}) = \mathcal{N}(\hat{f} \mid \mu_{\theta_m}(\hat{X}), K_{\theta_k}(\hat{X}, \hat{X}) + \sigma^2_{e}I), ``` -where $\mu_{\theta_m}(\hat{X})$ and $K_{\theta_k}(\hat{X}, \hat{X})$ denote the parameter dependent versions of the previously defined quantities. For numerical reasons, the logarithm of the marginal likelihood is typically used. Maximizing the log marginal likelihood with respect to the hyperparameters then yields the parameters that best explain the observed data. After obtaining the optimal hyperparamters, the posterior GP can be constructed as described above. +where $\mu_{\theta_m}(\hat{X})$ and $K_{\theta_k}(\hat{X}, \hat{X})$ denote the parameter dependent versions of the previously defined quantities. + +For numerical reasons, the logarithm of the marginal likelihood is typically used. Maximizing the log marginal likelihood with respect to the hyperparameters then yields the parameters that best explain the observed data. After obtaining the optimal hyperparamters, the posterior GP can be constructed as described above. + +To optimize the hyperparameters of our GP model before computing the posterior GP, we can pass a gradient-based optimizer provided by [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/) to the `GaussianProcess` constructor: + +```@example gaussianprocess +posterior_gp = GaussianProcess(gp, df, :y; optimization=MaximumLikelihoodEstimation()) + +prediction = DataFrame(:x => x_test) +evaluate!(gp_model, prediction; mode=:mean_and_var) + +prediction_mean = prediction[!, :y_mean] # hide +prediction_std = sqrt.(prediction[!, :y_var]) # hide + +p = plot(x_test, prediction_mean, color=:blue, label="Mean prediction") # hide +plot!( + x_test, prediction_mean, ribbon=2 .* prediction_std, + color=:grey, alpha=0.5, label="Confidence band" +) # hide +plot!(x_test, y_true, color=:red, label="True function") # hide + +savefig(p, "posterior-gp-optimized.svg"); # hide +return nothing # hide +``` +![](posterior-gp-optimized.svg) + +Internally, `MaximumLikelihoodEstimation()` defaults to using [`LBFGS`](https://julianlsolvers.github.io/Optim.jl/stable/algo/lbfgs/) optimizer that performs 10 optimization steps with standard optimization hyperparameters as defined [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/). Note that any other first-order optimizer supported by [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/), along with its corresponding hyperparameters, can also be used when constructing [`MaximumLikelihoodEstimation`](@ref). + +During optimization, GP hyperparameters $\theta_m, \theta_k$ and $\sigma^2_{e}$ are automatically extracted and updated. + +We support the automatic extraction of hyperparameters from mean functions provided by [`AbstractGPs.jl`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/api/#Mean-functions), with the exception of: +- Custom mean functions [`CustomMean`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/api/#AbstractGPs.CustomMean). These are defined with a custom function that itself could depend on hyperparameters. These additional hyperparameters are ignored in the optimization. + +Kernel functions are defined with the kernels and transformations provided by [`KernelFunctions.jl`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/). For similar reasons as with `CustomMean`, we do not extract potential function hyperparameters from the following kernels or transforms: +- Transforms defined with custom functions [`FunctionTransform`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/transform/#KernelFunctions.FunctionTransform), +- The [`GibbsKernel`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/kernels/#KernelFunctions.GibbsKernel), which models a kernel lengthscale parameter with the help of a function. -- Custom mean functions [`CustomMean`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/api/#AbstractGPs.CustomMean) -- Multi-output kernels [`MOKernel`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/kernels/#Multi-output-Kernels) -- Neural kernel networks [`NeuralKernelNetwork`] -- FunctionTransform -- GibbsKernel +Further, GP models containing the following kernels are not supported for hyperparameter optimization currently: +- Multi-output kernels [`MOKernel`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/kernels/#Multi-output-Kernels), +- Neural kernel networks [`NeuralKernelNetwork`]. From 78967125c930329f8c85d42c3512bcf9c905cd40 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 13:52:07 +0200 Subject: [PATCH 096/117] Delete preliminary testing files --- demo/metamodels/easyExampleGP.jl | 35 -------- demo/metamodels/gaussianprocess.jl | 51 ----------- demo/metamodels/simpleGPexample.jl | 43 ---------- demo/metamodels/test.jl | 130 ----------------------------- 4 files changed, 259 deletions(-) delete mode 100644 demo/metamodels/easyExampleGP.jl delete mode 100644 demo/metamodels/gaussianprocess.jl delete mode 100644 demo/metamodels/simpleGPexample.jl delete mode 100644 demo/metamodels/test.jl diff --git a/demo/metamodels/easyExampleGP.jl b/demo/metamodels/easyExampleGP.jl deleted file mode 100644 index a75ca8896..000000000 --- a/demo/metamodels/easyExampleGP.jl +++ /dev/null @@ -1,35 +0,0 @@ -using AbstractGPs -using UncertaintyQuantification -using DataFrames -using Random -using DisplayAs - -Random.seed!(42) - -x = collect(range(0, 10, 10)) -noise_var = 0.1 -y = sin.(x) + 0.3 * cos.(2 .* x) # + noise_var .* randn(length(x)) -df = DataFrame(x = x, y = y) - -σ² = 1e-5 -kernel = SqExponentialKernel() ∘ ScaleTransform(3.0)# ∘ ScaleTransform(1.0) -gp = with_gaussian_noise(GP(0.0, kernel), σ²) - -gpr = GaussianProcess(gp, df, :y) # ; optimization=MaximumLikelihoodEstimation() - -using Plots - -x_plot = collect(range(0, 5, 500)) -y_true = sin.(x_plot) + 0.3 * cos.(2 .* x_plot) - -prediction = DataFrame(:x => x_plot) -evaluate!(gpr, prediction; mode=:mean_and_var) - -prediction_mean = prediction[!, :y_mean] -prediction_std = sqrt.(prediction[!, :y_var]) - -plot(x_plot, prediction_mean, color=:blue, label="Mean prediction") -plot!(x_plot, prediction_mean, ribbon=2 .* prediction_std, color=:grey, alpha=0.5, label="Confidence band") - -# Optionally add ground truth function -plot!(x_plot, y_true, color=:red, label="Ground truth") \ No newline at end of file diff --git a/demo/metamodels/gaussianprocess.jl b/demo/metamodels/gaussianprocess.jl deleted file mode 100644 index 15f6a5b70..000000000 --- a/demo/metamodels/gaussianprocess.jl +++ /dev/null @@ -1,51 +0,0 @@ -using UncertaintyQuantification -using AbstractGPs -using Random -using Optim - - -# Setup Himmelblau example -x = RandomVariable.(Uniform(-5, 5), [:x1, :x2]) -himmelblau = Model( - df -> (df.x1 .^ 2 .+ df.x2 .- 11) .^ 2 .+ (df.x1 .+ df.x2 .^ 2 .- 7) .^ 2, :y -) -design = LatinHypercubeSampling(100) -training_data = sample(x, design) -evaluate!(himmelblau, training_data) - -# Setup the GP -σ² = 1e-5 -kernel = SqExponentialKernel() ∘ ARDTransform([0.5, 0.5]) -gp = with_gaussian_noise(GP(0.0, kernel), σ²) - -optimizer = MaximumLikelihoodEstimation(Optim.Adam(alpha=0.01), Optim.Options(; iterations=1000, show_trace=false)) -# optimizer = MaximumLikelihoodEstimation(Optim.LBFGS(), Optim.Options(; iterations=10, show_trace=false)) - -gpr = GaussianProcess( - gp, x, himmelblau, :y, design; input_transform=ZScoreTransform(), output_transform=ZScoreTransform(), optimization=optimizer -) - -test_data = sample(x, 1000) -evaluate!(gpr, test_data) - -p_data = test_data[:, [:x1, :x2]] -evaluate!(himmelblau, p_data) - -mse = mean((p_data.y .- test_data.y) .^ 2) -println("MSE is: $mse") - -using Plots -using DataFrames -# SNSInputTransform will crash the plotting routine on -5 and 5 values -a = range(-5, 5; length=1000) -b = range(5, -5; length=1000) -himmelblau_f(x1, x2) = (x1^2 + x2 - 11)^2 + (x1 + x2^2 - 7)^2 -function gpr_f(x, y) - df = DataFrame(x1 = x, x2 = y) - evaluate!(gpr, df) - return only(df[:, :y]) -end - -s1 = surface(a, b, himmelblau_f; plot_title="Himmelblau's function") -s2 = surface(a, b, gpr_f; plot_title="Gaussian process regression") -plot(s1, s2, layout = (1, 2), legend = false) \ No newline at end of file diff --git a/demo/metamodels/simpleGPexample.jl b/demo/metamodels/simpleGPexample.jl deleted file mode 100644 index f2c246335..000000000 --- a/demo/metamodels/simpleGPexample.jl +++ /dev/null @@ -1,43 +0,0 @@ -using AbstractGPs -using UncertaintyQuantification -using DataFrames -using Random - -Random.seed!(42) - -x = RandomVariable(Uniform(0, 5), :x) - -σ = 0.1 -f = 3.0 -noisy_sinus = Model( - df -> sin.(f .* df.x) .+ σ .* randn(size(df.x)), :y -) -experimentaldesign = LatinHypercubeSampling(10) - -σ² = σ^2 -kernel = SqExponentialKernel() -kernel = SqExponentialKernel() + PeriodicKernel(; r=[f]) -gp = with_gaussian_noise(GP(0.0, kernel), σ²) -gp = GP(0.0, kernel) - -gpr = GaussianProcess( - gp, x, noisy_sinus, :y, experimentaldesign; - #optimization=NoHyperparameterOptimization() -) - -using Plots - -x_plot = collect(range(0, 5, 500)) -y_true = sin.(f .* x_plot) - -prediction = DataFrame(:x => x_plot) -mean_and_var!(gpr, prediction) - -prediction_mean = prediction[!, :y_mean] -prediction_std = sqrt.(prediction[!, :y_var]) - -plot(x_plot, prediction_mean, color=:blue, label="Mean prediction") -plot!(x_plot, prediction_mean, ribbon=2 .* prediction_std, color=:grey, alpha=0.5, label="Confidence band") - -# Optionally add ground truth function -plot!(x_plot, y_true, color=:red, label="Ground truth") \ No newline at end of file diff --git a/demo/metamodels/test.jl b/demo/metamodels/test.jl deleted file mode 100644 index d20377744..000000000 --- a/demo/metamodels/test.jl +++ /dev/null @@ -1,130 +0,0 @@ -using ParameterHandling -using AbstractGPs - - -struct Parameterized{T} - object::T -end - -function (p::Parameterized)(θ) - return apply_parameters(p.object, ParameterHandling.value(θ)) -end - -parameterize(object) = Parameterized(object), extract_parameters(object) - -extract_parameters(f::GP) = (extract_parameters(f.mean), extract_parameters(f.kernel)) -function apply_parameters(f::GP, θ) - return GP(apply_parameters(f.mean, θ[1]), apply_parameters(f.kernel, θ[2])) -end - -extract_parameters(m::ConstMean) = m.c -apply_parameters(::ConstMean, θ) = ConstMean(θ) - -KernelsWithoutParameters = Union{SEKernel,Matern32Kernel,Matern52Kernel,WhiteKernel} - -extract_parameters(::T) where {T<:KernelsWithoutParameters} = nothing -apply_parameters(k::T, θ) where {T<:KernelsWithoutParameters} = k - -# ------------------------------------------ - -extract_parameters(k::TransformedKernel) = (extract_parameters(k.kernel), extract_parameters(k.transform)) -apply_parameters(k::TransformedKernel, θ) = TransformedKernel( - apply_parameters(k.kernel, θ[1]), apply_parameters(k.transform, θ[2]) - ) - -extract_parameters(k::PeriodicKernel) = ParameterHandling.positive(k.r) -apply_parameters(::PeriodicKernel, θ) = PeriodicKernel(; r=θ) - -extract_parameters(t::ARDTransform) = ParameterHandling.positive(t.v) -apply_parameters(::ARDTransform, θ) = ARDTransform(θ) - -extract_parameters(t::LinearTransform) = t.A -apply_parameters(::LinearTransform, θ) = LinearTransform(θ) - -extract_parameters(k::KernelSum) = map(extract_parameters, k.kernels) -apply_parameters(k::KernelSum, θ) = KernelSum(map(apply_parameters, k.kernels, θ)) - -kernel = PeriodicKernel(2) -kernel = PeriodicKernel(1) - -transform = LinearTransform(rand(2,2)) - -extract_parameters(kernel) -extract_parameters(transform) - -A = rand(2,2) -kernel = SqExponentialKernel() ∘ LinearTransform(A) -kernel = (SqExponentialKernel() ∘ ARDTransform([1.0, 1.0])) ⊗ (SqExponentialKernel() ∘ LinearTransform(A)) -gp = GP(0.0, kernel) - -params = extract_parameters(gp) -model, θ = parameterize(gp) -θ_flat, unflatten = ParameterHandling.flatten(θ) -gp_model = model(unflatten(θ_flat)) - - -struct FixedTransform{T} <: Transform - component::T -end - -struct FixedKernel{T} <: Kernel - component::T -end - -fixed(t::Transform) = FixedTransform(t) -fixed(k::Kernel) = FixedKernel(k) - -extract_parameters(c::FixedTransform) = ParameterHandling.fixed(extract_parameters(c.component)) -apply_parameters(c::FixedTransform, θ) = apply_parameters(c.component, θ) - -extract_parameters(c::FixedKernel) = ParameterHandling.fixed(extract_parameters(c.component)) -apply_parameters(c::FixedKernel, θ) = apply_parameters(c.component, θ) - -A = rand(2,2) -kernel = fixed((SqExponentialKernel() ∘ LinearTransform(A)) + (SqExponentialKernel() ∘ ARDTransform([1.0, 1.0]))) -gp = GP(0.0, kernel) - -params = extract_parameters(gp) -model, θ = parameterize(gp) -θ_flat, unflatten = ParameterHandling.flatten(θ) -gp_model = model(unflatten(θ_flat)) - -function collect_concrete_kernels(T::Type) - result = Set{Type}() - - function recurse(t) - for s in subtypes(t) - if isabstracttype(s) - recurse(s) # dive into abstract types - else - push!(result, s) # collect concrete type - end - end - end - - recurse(T) - return collect(result) -end - -# Retrieve all kernel types -all_kernels = collect_concrete_kernels(KernelFunctions.Kernel) -for k in all_kernels - println(k) -end -println("Found $(length(all_kernels)) concrete kernel types:") - -all_transforms = all_concrete_subtypes(KernelFunctions.Transform) -println("Found $(length(all_transforms)) concrete transformation types.") - -# Retrieve all transformation types -transform_types = all_transform_types() -println("Found $(length(transform_types)) transformation types.") - -for KT in all_kernel_types - try - obj = KT() # maybe you need default constructors - extract_parameters(obj) - catch e - println("Missing extract_parameters for $KT: $e") - end -end \ No newline at end of file From 3679ee068e119ec7d3d8cf20a60f808fcf426ffd Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 13:52:40 +0200 Subject: [PATCH 097/117] Fix gaussian process output name for mode :mean --- src/models/gp/gaussianprocess.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 9da314d3d..25b7203ba 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -220,7 +220,7 @@ function evaluate!( if mode === :mean μ = mean(finite_projection) col = Symbol(string(gp.output, "_mean")) - data[!, gp.output] = gp.standardizer.fₒ⁻¹(μ) + data[!, col] = gp.standardizer.fₒ⁻¹(μ) elseif mode === :var σ² = var(finite_projection) col = Symbol(string(gp.output, "_var")) From ca8df9d6c696282fcce77fed71ad0a663f22b92e Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 13:53:07 +0200 Subject: [PATCH 098/117] Add literate example for gaussian process regression --- docs/literate/metamodels/gaussianprocess.jl | 130 ++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 docs/literate/metamodels/gaussianprocess.jl diff --git a/docs/literate/metamodels/gaussianprocess.jl b/docs/literate/metamodels/gaussianprocess.jl new file mode 100644 index 000000000..4afd16506 --- /dev/null +++ b/docs/literate/metamodels/gaussianprocess.jl @@ -0,0 +1,130 @@ +#=== +# Gaussian Process Regression + +## Himmelblau's Function + +In this example, we will model the following test function (known as Himmelblau's function) in the range ``x1, x2 ∈ [-5, 5]`` with a Gaussian process (GP) regression model. + +It is defined as: + + ```math +f(x1, x2) = (x1^2 + x2 - 11)^2 + (x1 + x2^2 - 7)^2. +``` +===# +# ![](himmelblau.svg) +#=== +Aanalogue to the response surface example, we create an array of random variables, that will be used when evaluating the points that our experimental design produces. +===# + +using UncertaintyQuantification + +x = RandomVariable.(Uniform(-5, 5), [:x1, :x2]) + +himmelblau = Model( + df -> (df.x1 .^ 2 .+ df.x2 .- 11) .^ 2 .+ (df.x1 .+ df.x2 .^ 2 .- 7) .^ 2, :y +) + +#=== +Next, we chose a experimental design. In this example, we are using a `LatinHyperCube` design from which we draw 80 samples to train our model: +===# + +design = LatinHypercubeSampling(80) + +#=== +After that, we construct a prior GP model. Here we assume a constant mean of 0.0 and a squared exponential kernel with automatic relevance determination (ARD). +We also assume a small Gaussian noise term in the observations for numerical stability: +===# + +using AbstractGPs + +mean_f = ConstMean(0.0) +kernel = SqExponentialKernel() ∘ ARDTransform([1.0, 1.0]) +σ² = 1e-5 + +gp_prior = with_gaussian_noise(GP(mean_f, kernel), σ²) + +#=== +Next, we set up an optimizer used in the log marginal likelihood maximization to find the optimal hyperparameters of our GP model. Here we use the Adam optimizer from the `Optim.jl` package with a learning rate of 0.005 and run it for 10 iterations.: +===# +using Optim + +optimizer = MaximumLikelihoodEstimation(Optim.Adam(alpha=0.005), Optim.Options(; iterations=10, show_trace=false)) + +#=== +Finally, we define an input standardization (here a z-score transform). While not strictly necessary for this example, standardization can help finding good hyperparameters. +Note that we can also define an output transform to scale the output for training the GP. When evaluating the GP model, the input will be automatically transformed with the fitted standardization. +The output will be transformed back to the original scale automatically as well. +===# + +input_transform = ZScoreTransform() + +#=== +The GP regression model is now constructed by calling the `GaussianProcess` constructor with the prior GP, the input random variables, the model, the output symbol, the experimental design, and the optional input and output transforms as well as the hyperparameter optimization method. +The construction then samples the experimental design, evaluates the model at the sampled points, standardizes the input and output data, optimizes the hyperparameters of the GP, and constructs the posterior GP. +===# +#md using Random #hide +#md Random.seed!(42) #hide + +gp_model = GaussianProcess( + gp_prior, + x, + himmelblau, + :y, + design; + input_transform=input_transform, + optimization=optimizer +) + +#=== +To evaluate the `GaussianProcess`, use `evaluate!(gp::GaussianProcess, data::DataFrame)` with the `DataFrame` containing the points you want to evaluate. +The evaluation of a GP is not unique, and we can choose to evaluate the mean prediction, the prediction variance, a combination of both, or draw samples from the posterior distribution. +The default is to evaluate the mean prediction. +We can specify the evaluation mode via the `mode` keyword argument. Supported options are: +- `:mean` - predictive mean (default) +- `:var` - predictive variance +- `:mean_and_var` - both mean and variance +- `:sample` - random samples from the predictive distribution +===# + +test_data = sample(x, 1000) +evaluate!(gp_model, test_data; mode=:mean_and_var) + +#=== +The mean prediction of our model in this case has an mse of about 65 and looks like this in comparison to the original: +===# + +#md using Plots #hide +#md using DataFrames #hide +#md a = range(-5, 5; length=200) #hide +#md b = range(-5, 5; length=200) #hide +#md A = repeat(collect(a)', length(b), 1) #hide +#md B = repeat(collect(b), 1, length(a)) #hide +#md df = DataFrame(x1 = vec(A), x2 = vec(B)) #hide +#md evaluate!(gpr, df; mode=:mean_and_var) #hide +#md evaluate!(himmelblau, df) #hide +#md gp_mean = reshape(df[:, :y_mean], length(b), length(a)) #hide +#md gp_var = reshape(df[:, :y_var], length(b), length(a)) #hide +#md himmelblau_f = reshape(df[:, :y], length(b), length(a)) #hide +#md s1 = surface(a, b, himmelblau_f; plot_title="Himmelblau's function") +#md s2 = surface(a, b, gp_mean; plot_title="GP posterior mean") +#md plot(s1, s2, layout = (1, 2), legend = false) +#md savefig("gp-mean-comparison.svg") # hide +#md s3 = surface(a, b, gp_var; plot_title="GP posterior variance") # hide +#md plot(s3, legend = false) #hide +#md savefig("gp-variance.svg"); nothing # hide + +# ![](gp-mean-comparison.svg) + +#=== +Note that the mse in comparison to the response surface model (with an mse of about 1e-26) is significantly higher. +However, the GP model also provides a measure of uncertainty in its predictions via the predictive variance. +===# + +# ![](gp-variance.svg) + +#jl test_data = sample(x, 1000) +#jl evaluate!(gp_model, test_data) +#jl evaluate!(himmelblau, test_data) + +#jl mse = mean((test_data.y .- test_data.y_mean) .^ 2) +#jl println("MSE is: $mse") From d0b37491740689bafc92de67e39cdb056aed0b4c Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 13:53:59 +0200 Subject: [PATCH 099/117] Delete unused exports --- src/UncertaintyQuantification.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index ed3230383..1f52fb22d 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -164,7 +164,6 @@ export evaluate! export gradient export gradient_in_standard_normal_space export mean -# export mean_and_var! export multivariate_indices export periodogram export polynomialchaos @@ -180,7 +179,6 @@ export to_copula_space export to_physical_space! export to_standard_normal_space export to_standard_normal_space! -# export var! export with_gaussian_noise include("inputs/empiricaldistribution.jl") From 1fc82ee2257722ea7df25d5d03fcc8a545d4cd64 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 14:02:39 +0200 Subject: [PATCH 100/117] Add execution of gaussian process tests --- test/runtests.jl | 98 ++++++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 0d80e6519..b1a9533f8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,61 +10,63 @@ using StatsBase: fit, Histogram, corkendall using Test using UncertaintyQuantification -# include("inputs/empiricaldistribution.jl") -# include("dynamics/psd.jl") -# include("inputs/parameter.jl") -# include("inputs/jointdistribution.jl") -# include("inputs/imprecise/interval.jl") -# include("inputs/imprecise/p-box.jl") -# include("inputs/randomvariables/randomvariable.jl") -# include("inputs/randomvariables/distributionparameters.jl") -# include("inputs/jointdistribution.jl"); -# include("inputs/inputs.jl") -# include("inputs/copulas/gaussian.jl") -# include("inputs/stochasticprocesses/spectralrepresentation.jl") -# include("inputs/stochasticprocesses/models.jl") +include("inputs/empiricaldistribution.jl") +include("dynamics/psd.jl") +include("inputs/parameter.jl") +include("inputs/jointdistribution.jl") +include("inputs/imprecise/interval.jl") +include("inputs/imprecise/p-box.jl") +include("inputs/randomvariables/randomvariable.jl") +include("inputs/randomvariables/distributionparameters.jl") +include("inputs/jointdistribution.jl"); +include("inputs/inputs.jl") +include("inputs/copulas/gaussian.jl") +include("inputs/stochasticprocesses/spectralrepresentation.jl") +include("inputs/stochasticprocesses/models.jl") -# include("models/external/solvers.jl") -# include("models/external/externalmodel.jl") +include("models/external/solvers.jl") +include("models/external/externalmodel.jl") include("models/gp/gaussianprocess.jl") +include("models/gp/hyperparametertuning.jl") +include("models/gp/parameterization.jl") include("models/gp/standardization.jl") -# include("models/model.jl") -# include("models/polyharmonicspline.jl") -# include("models/pce/pcebases.jl") -# include("models/pce/polynomialchaosexpansion.jl") -# include("models/responsesurface.jl") -# include("models/imprecise/propagation.jl") +include("models/model.jl") +include("models/polyharmonicspline.jl") +include("models/pce/pcebases.jl") +include("models/pce/polynomialchaosexpansion.jl") +include("models/responsesurface.jl") +include("models/imprecise/propagation.jl") -# include("modelupdating/bayesianupdating.jl") -# include("modelupdating/bayesianMAP.jl") +include("modelupdating/bayesianupdating.jl") +include("modelupdating/bayesianMAP.jl") -# include("reliability/form.jl") -# include("reliability/probabilityoffailure.jl") -# include("reliability/probabilityoffailure_imprecise.jl") +include("reliability/form.jl") +include("reliability/probabilityoffailure.jl") +include("reliability/probabilityoffailure_imprecise.jl") -# include("sensitivity/gradient.jl") -# include("sensitivity/sobolindices.jl") +include("sensitivity/gradient.jl") +include("sensitivity/sobolindices.jl") -# include("simulations/doe.jl") -# include("simulations/montecarlo.jl") -# include("simulations/subset.jl") +include("simulations/doe.jl") +include("simulations/montecarlo.jl") +include("simulations/subset.jl") -# include("util/fourier-transform.jl") +include("util/fourier-transform.jl") -# if Sys.islinux() -# HPC = false -# HPC_account = "HPC_account_1" -# HPC_partition = "CPU_partition" -# if "HPC" in ARGS -# HPC = true -# HPC_account = ARGS[2] -# HPC_partition = ARGS[3] -# @warn "Running a slurm test with HPC=ON, using account $HPC_account and partition $HPC_partition. Several (20) small 1-task calculations will be submitted to slurm for testing in different job array configuations." -# end +if Sys.islinux() + HPC = false + HPC_account = "HPC_account_1" + HPC_partition = "CPU_partition" + if "HPC" in ARGS + HPC = true + HPC_account = ARGS[2] + HPC_partition = ARGS[3] + @warn "Running a slurm test with HPC=ON, using account $HPC_account and partition $HPC_partition. Several (20) small 1-task calculations will be submitted to slurm for testing in different job array configuations." + end -# if HPC == false && !occursin("test/test_utilities", ENV["PATH"]) -# @warn "For slurm test to pass on Linux, test_utilities/sbatch must be added to PATH" -# @warn "sbatch command line tool may use the fake test_utilities/sbatch" -# end -# include("hpc/slurm.jl") -# end + if HPC == false && !occursin("test/test_utilities", ENV["PATH"]) + @warn "For slurm test to pass on Linux, test_utilities/sbatch must be added to PATH" + @warn "sbatch command line tool may use the fake test_utilities/sbatch" + end + include("hpc/slurm.jl") +end From b5369ff18425aa9a9a043d3911c1d5967ca23fac Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 16:43:49 +0200 Subject: [PATCH 101/117] Reexport AbstractGPs --- src/UncertaintyQuantification.jl | 2 +- test/runtests.jl | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/UncertaintyQuantification.jl b/src/UncertaintyQuantification.jl index c47af7e76..c4d2e54e4 100644 --- a/src/UncertaintyQuantification.jl +++ b/src/UncertaintyQuantification.jl @@ -1,6 +1,5 @@ module UncertaintyQuantification -using AbstractGPs using Bootstrap using CovarianceEstimation using DataFrames @@ -25,6 +24,7 @@ using Roots using StatsBase using Zygote +@reexport using AbstractGPs @reexport using Distributions import Base: rand, names, copy, run, length diff --git a/test/runtests.jl b/test/runtests.jl index 5d75c5724..a8fbb69f4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,3 @@ -using AbstractGPs using DataFrames using Distributed using HCubature From a7f44cef06004b12ac96c1cf2fbaafbe9593db79 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 16:44:14 +0200 Subject: [PATCH 102/117] Fix docstrings --- src/models/gp/standardization.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index c5f9d5002..ba49d2141 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -12,7 +12,7 @@ Internally, the `DataStandardizer` constructs the functions required for evaluat # Examples ```jldoctest julia> id = UncertaintyQuantification.IdentityTransform() -UncertaintyQuantification.IdentityTransform() +IdentityTransform() ``` """ struct IdentityTransform <: AbstractDataTransform end @@ -28,7 +28,7 @@ Internally, the `DataStandardizer` constructs the functions required for evaluat # Examples ```jldoctest julia> zscore = UncertaintyQuantification.ZScoreTransform() -UncertaintyQuantification.ZScoreTransform() +ZScoreTransform() ``` """ struct ZScoreTransform <: AbstractDataTransform end @@ -44,7 +44,7 @@ Internally, the `DataStandardizer` constructs the functions required for evaluat # Examples ```jldoctest julia> unitrange = UncertaintyQuantification.UnitRangeTransform() -UncertaintyQuantification.UnitRangeTransform() +UnitRangeTransform() ``` """ struct UnitRangeTransform <: AbstractDataTransform end From 70ef0fc64c2f9f92f87a9e0a0dba9045ff918676 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 16:44:33 +0200 Subject: [PATCH 103/117] Fix docstrings --- src/models/gp/gaussianprocess.jl | 16 +++++----------- src/models/gp/hyperparametertuning.jl | 2 ++ 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 25b7203ba..40bfc4f87 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -1,5 +1,5 @@ struct GaussianProcess <: UQModel - gp::AbstractGPs.PosteriorGP + gp::PosteriorGP input::Union{Symbol, Vector{Symbol}} output::Symbol standardizer::DataStandardizer @@ -7,7 +7,7 @@ end """ GaussianProcess( - gp::Union{AbstractGPs.GP, NoisyGP}, + gp::Union{GP, NoisyGP}, data::DataFrame, output::Symbol; input_transform::AbstractDataTransform = IdentityTransform(), @@ -32,8 +32,6 @@ Constructs a Gaussian process model for the given data and output variable. # Examples ```jldoctest -julia> using AbstractGPs - julia> gp = with_gaussian_noise(GP(0.0, SqExponentialKernel()), 1e-3); julia> data = DataFrame(x = 1:10, y = [1, 4, 10, 15, 24, 37, 50, 62, 80, 101]); @@ -42,7 +40,7 @@ julia> gp_model = GaussianProcess(gp, data, :y); ``` """ function GaussianProcess( - gp::Union{AbstractGPs.GP, NoisyGP}, + gp::Union{GP, NoisyGP}, data::DataFrame, output::Symbol; input_transform::AbstractDataTransform=IdentityTransform(), @@ -75,7 +73,7 @@ end """ GaussianProcess( - gp::Union{AbstractGPs.GP, NoisyGP}, + gp::Union{GP, NoisyGP}, input::Union{UQInput, Vector{<:UQInput}}, model::Union{UQModel, Vector{<:UQModel}}, output::Symbol, @@ -104,8 +102,6 @@ Constructs a Gaussian process model for the given input and model. Evaluates the # Examples ```jldoctest -julia> using AbstractGPs - julia> gp = with_gaussian_noise(GP(0.0, SqExponentialKernel()), 1e-3); julia> input = RandomVariable(Uniform(0, 5), :x); @@ -118,7 +114,7 @@ julia> gp_model = GaussianProcess(gp, input, model, :y, design); ``` """ function GaussianProcess( - gp::Union{AbstractGPs.GP, NoisyGP}, + gp::Union{GP, NoisyGP}, input::Union{UQInput, Vector{<:UQInput}}, model::Union{UQModel, Vector{<:UQModel}}, output::Symbol, @@ -179,8 +175,6 @@ Evaluates a fitted [`GaussianProcess`](@ref) model at the specified input locati # Examples ```jldoctest -julia> using AbstractGPs - julia> gp = with_gaussian_noise(GP(0.0, SqExponentialKernel()), 1e-3); julia> data = DataFrame(x = 1:10, y = [1, 4, 10, 15, 24, 37, 50, 62, 80, 101]); diff --git a/src/models/gp/hyperparametertuning.jl b/src/models/gp/hyperparametertuning.jl index d9a039516..6089ecd2b 100644 --- a/src/models/gp/hyperparametertuning.jl +++ b/src/models/gp/hyperparametertuning.jl @@ -33,6 +33,8 @@ such as `LBFGS()`, `Adam()`, or `ConjugateGradient()`. # Examples ```jldoctest +julia> using Optim + julia> MaximumLikelihoodEstimation(Optim.Adam(alpha=0.01), Optim.Options(; iterations=1000, show_trace=false)) MaximumLikelihoodEstimation(Adam{Float64, Float64, Flat}(0.01, 0.9, 0.999, 1.0e-8, Flat()), Optim.Options(x_abstol = 0.0, x_reltol = 0.0, f_abstol = 0.0, f_reltol = 0.0, g_abstol = 1.0e-8, outer_x_abstol = 0.0, outer_x_reltol = 0.0, outer_f_abstol = 0.0, outer_f_reltol = 0.0, outer_g_abstol = 1.0e-8, f_calls_limit = 0, g_calls_limit = 0, h_calls_limit = 0, allow_f_increases = true, allow_outer_f_increases = true, successive_f_tol = 1, iterations = 1000, outer_iterations = 1000, store_trace = false, trace_simplex = false, show_trace = false, extended_trace = false, show_warnings = true, show_every = 1, time_limit = NaN, ) ) From 61322a3a35769b91909b17dd41e9aa901d3e855f Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 16:49:37 +0200 Subject: [PATCH 104/117] Fix documentation dependencies and loaded modules --- docs/Project.toml | 2 ++ docs/literate/metamodels/gaussianprocess.jl | 2 -- docs/src/manual/metamodels.md | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index 20c48de58..00fa50dfb 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -5,6 +5,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" +Optim = "429524aa-4258-5aef-a3af-852621145aeb" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" UncertaintyQuantification = "7183a548-a887-11e9-15ce-a56ab60bad7a" @@ -12,3 +13,4 @@ UncertaintyQuantification = "7183a548-a887-11e9-15ce-a56ab60bad7a" Documenter = "1.14.1" DocumenterCitations = "1.4.1" DocumenterVitepress = "0.2.6" +Optim = "1.13.2" diff --git a/docs/literate/metamodels/gaussianprocess.jl b/docs/literate/metamodels/gaussianprocess.jl index 4afd16506..59a1059ae 100644 --- a/docs/literate/metamodels/gaussianprocess.jl +++ b/docs/literate/metamodels/gaussianprocess.jl @@ -35,8 +35,6 @@ After that, we construct a prior GP model. Here we assume a constant mean of 0.0 We also assume a small Gaussian noise term in the observations for numerical stability: ===# -using AbstractGPs - mean_f = ConstMean(0.0) kernel = SqExponentialKernel() ∘ ARDTransform([1.0, 1.0]) σ² = 1e-5 diff --git a/docs/src/manual/metamodels.md b/docs/src/manual/metamodels.md index 064032fcf..e760c5598 100644 --- a/docs/src/manual/metamodels.md +++ b/docs/src/manual/metamodels.md @@ -24,8 +24,7 @@ This prior GP specifies that any finite collection of function values follows a To define a prior GP we use [`AbstractGPs.jl`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/) for the GP interface and mean function, and [`KernelFunctions.jl`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/) for the definition of a covariance kernel. Below, we construct a simple prior GP with a constant zero mean function and a scaled squared exponential kernel: ```@example gaussianprocess -using UncertaintyQuantification # hide -using AbstractGPs +using UncertaintyQuantification kernel = SqExponentialKernel() ∘ ScaleTransform(3.0) gp = GP(0.0, kernel) From a02af1d3c1497b178b2921c0c5252daf6d6277f3 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 16:56:08 +0200 Subject: [PATCH 105/117] Fix type of posterior gp in GaussianProcess --- src/models/gp/gaussianprocess.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 40bfc4f87..79e65ac79 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -1,5 +1,5 @@ struct GaussianProcess <: UQModel - gp::PosteriorGP + gp::AbstractGPs.PosteriorGP input::Union{Symbol, Vector{Symbol}} output::Symbol standardizer::DataStandardizer From 13179d180be8d284dad6a111d57cfc77f7c989a8 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 16:56:25 +0200 Subject: [PATCH 106/117] Add literate demo file --- demo/metamodels/gaussianprocess.jl | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 demo/metamodels/gaussianprocess.jl diff --git a/demo/metamodels/gaussianprocess.jl b/demo/metamodels/gaussianprocess.jl new file mode 100644 index 000000000..181fbf374 --- /dev/null +++ b/demo/metamodels/gaussianprocess.jl @@ -0,0 +1,43 @@ +using UncertaintyQuantification + +x = RandomVariable.(Uniform(-5, 5), [:x1, :x2]) + +himmelblau = Model( + df -> (df.x1 .^ 2 .+ df.x2 .- 11) .^ 2 .+ (df.x1 .+ df.x2 .^ 2 .- 7) .^ 2, :y +) + +design = LatinHypercubeSampling(80) + +mean_f = ConstMean(0.0) +kernel = SqExponentialKernel() ∘ ARDTransform([1.0, 1.0]) +σ² = 1e-5 + +gp_prior = with_gaussian_noise(GP(mean_f, kernel), σ²) + +using Optim + +optimizer = MaximumLikelihoodEstimation(Optim.Adam(alpha=0.005), Optim.Options(; iterations=10, show_trace=false)) + +input_transform = ZScoreTransform() + +gp_model = GaussianProcess( + gp_prior, + x, + himmelblau, + :y, + design; + input_transform=input_transform, + optimization=optimizer +) + +test_data = sample(x, 1000) +evaluate!(gp_model, test_data; mode=:mean_and_var) + +test_data = sample(x, 1000) +evaluate!(gp_model, test_data) +evaluate!(himmelblau, test_data) + +mse = mean((test_data.y .- test_data.y_mean) .^ 2) +println("MSE is: $mse") + +# This file was generated using Literate.jl, https://github.com/fredrikekre/Literate.jl From 11667f23b6962a7bfeb701751102979956a0cbd5 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 17:08:09 +0200 Subject: [PATCH 107/117] Fix bug where single UQInput is not filtered for random inputs --- src/models/gp/gaussianprocess.jl | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index 79e65ac79..b172c9c14 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -115,7 +115,7 @@ julia> gp_model = GaussianProcess(gp, input, model, :y, design); """ function GaussianProcess( gp::Union{GP, NoisyGP}, - input::Union{UQInput, Vector{<:UQInput}}, + input::Vector{<:UQInput}, model::Union{UQModel, Vector{<:UQModel}}, output::Symbol, experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}; @@ -154,6 +154,28 @@ function GaussianProcess( ) end +function GaussianProcess( + gp::Union{GP, NoisyGP}, + input::UQInput, + model::Union{UQModel, Vector{<:UQModel}}, + output::Symbol, + experimentaldesign::Union{AbstractMonteCarlo, AbstractDesignOfExperiments}; + input_transform::AbstractDataTransform=IdentityTransform(), + output_transform::AbstractDataTransform=IdentityTransform(), + optimization::AbstractHyperparameterOptimization=NoHyperparameterOptimization() +) + return GaussianProcess( + gp, + [input], + model, + output, + experimentaldesign; + input_transform=input_transform, + output_transform=output_transform, + optimization=optimization + ) +end + """ evaluate!(gp::GaussianProcess, data::DataFrame; mode::Symbol = :mean, n_samples::Int = 1) From 4e4c19d4b8e84cb4871b1763fd01d772abdba8ca Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Tue, 14 Oct 2025 17:08:40 +0200 Subject: [PATCH 108/117] Fix docstring does not require using AbstractGP --- src/models/gp/parameterization.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl index cbbb73a11..77b300ec5 100644 --- a/src/models/gp/parameterization.jl +++ b/src/models/gp/parameterization.jl @@ -185,8 +185,6 @@ matrix when evaluating the finite-dimensional projection of `gp`. # Examples ```jldoctest -julia> using AbstractGPs - julia> gp = GP(SqExponentialKernel()); julia> noisy_gp = with_gaussian_noise(gp, 0.1); From 8fbc8a8be60aa102b6d3dc92f8162e68cc785d16 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 16 Oct 2025 18:16:21 +0200 Subject: [PATCH 109/117] Add gaussian processes api --- docs/make.jl | 1 + docs/src/api/gaussianprocesses.md | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 docs/src/api/gaussianprocesses.md diff --git a/docs/make.jl b/docs/make.jl index 2aaf23318..cd691ffcc 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -59,6 +59,7 @@ makedocs(; "Reliability" => "api/reliability.md", "ResponseSurface" => "api/responsesurface.md", "PolyharmonicSpline" => "api/polyharmonicspline.md", + "Gaussian Processes" => "api/gaussianprocesses.md", "Simulations" => "api/simulations.md", "Bayesian Updating" => "api/bayesianupdating.md", "Power Spectral Density Functions" => "api/psd.md", diff --git a/docs/src/api/gaussianprocesses.md b/docs/src/api/gaussianprocesses.md new file mode 100644 index 000000000..2d649fe6d --- /dev/null +++ b/docs/src/api/gaussianprocesses.md @@ -0,0 +1,27 @@ +# Gaussian Process Regression + +Methods for Gaussian process regression. + +## Index + +```@index +Pages = ["gaussianprocesses.md"] +``` + +## Types + +```@docs +GaussianProcess +NoHyperparameterOptimization +MaximumLikelihoodEstimation +IdentityTransform +ZScoreTransform +UnitRangeTransform +StandardNormalTransform +``` + +## Functions + +```@docs +evaluate!(gp::GaussianProcess, data::DataFrame; mode::Symbol = :mean, n_samples::Int = 1) +``` From 9982b849f88e6c63742b7246dfd1f00166550ad4 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 16 Oct 2025 18:16:50 +0200 Subject: [PATCH 110/117] Fix typo in gaussian process literate demo --- docs/literate/metamodels/gaussianprocess.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/literate/metamodels/gaussianprocess.jl b/docs/literate/metamodels/gaussianprocess.jl index 59a1059ae..7fc4584ba 100644 --- a/docs/literate/metamodels/gaussianprocess.jl +++ b/docs/literate/metamodels/gaussianprocess.jl @@ -98,12 +98,12 @@ The mean prediction of our model in this case has an mse of about 65 and looks l #md A = repeat(collect(a)', length(b), 1) #hide #md B = repeat(collect(b), 1, length(a)) #hide #md df = DataFrame(x1 = vec(A), x2 = vec(B)) #hide -#md evaluate!(gpr, df; mode=:mean_and_var) #hide +#md evaluate!(gp_model, df; mode=:mean_and_var) #hide #md evaluate!(himmelblau, df) #hide #md gp_mean = reshape(df[:, :y_mean], length(b), length(a)) #hide #md gp_var = reshape(df[:, :y_var], length(b), length(a)) #hide -#md himmelblau_f = reshape(df[:, :y], length(b), length(a)) #hide -#md s1 = surface(a, b, himmelblau_f; plot_title="Himmelblau's function") +#md himmelblau_values = reshape(df[:, :y], length(b), length(a)) #hide +#md s1 = surface(a, b, himmelblau_values; plot_title="Himmelblau's function") #md s2 = surface(a, b, gp_mean; plot_title="GP posterior mean") #md plot(s1, s2, layout = (1, 2), legend = false) #md savefig("gp-mean-comparison.svg") # hide From 697cb54cb7e5fbb992b39137e8650cf411e892e6 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Thu, 16 Oct 2025 18:17:11 +0200 Subject: [PATCH 111/117] Fix jldoctest error --- src/models/gp/gaussianprocess.jl | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index b172c9c14..d23972535 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -102,15 +102,14 @@ Constructs a Gaussian process model for the given input and model. Evaluates the # Examples ```jldoctest -julia> gp = with_gaussian_noise(GP(0.0, SqExponentialKernel()), 1e-3); - -julia> input = RandomVariable(Uniform(0, 5), :x); - -julia> model = Model(df -> sin.(df.x), :y); - -julia> design = LatinHypercubeSampling(10); - -julia> gp_model = GaussianProcess(gp, input, model, :y, design); +julia> begin # hide + gp = with_gaussian_noise(GP(0.0, SqExponentialKernel()), 1e-3); + x = RandomVariable(Uniform(0, 5), :x); + model = Model(df -> sin.(df.x), :y); + design = LatinHypercubeSampling(10); + gp_model = GaussianProcess(gp, x, model, :y, design); + nothing # hide + end # hide ``` """ function GaussianProcess( From d4b796a0e25c681e4bed5dbcb51be98256a531e7 Mon Sep 17 00:00:00 2001 From: felixmett Date: Fri, 17 Oct 2025 15:16:37 +0200 Subject: [PATCH 112/117] Change github username of Felix --- docs/src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 39084f03d..7a2e5bbf9 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -24,7 +24,7 @@ authors: link: https://github.com/mlsuh - name: Felix Mett platform: github - link: https://github.com/Cr0gan + link: https://github.com/felixmett - name: Andrea Perin platform: github link: https://github.com/andreaperin From aae5b9d0dd93fe12b050d84669d963780a38b97b Mon Sep 17 00:00:00 2001 From: felixmett Date: Fri, 17 Oct 2025 15:16:55 +0200 Subject: [PATCH 113/117] Add missing docs --- docs/src/api/gaussianprocesses.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/api/gaussianprocesses.md b/docs/src/api/gaussianprocesses.md index 2d649fe6d..fcb298f04 100644 --- a/docs/src/api/gaussianprocesses.md +++ b/docs/src/api/gaussianprocesses.md @@ -24,4 +24,5 @@ StandardNormalTransform ```@docs evaluate!(gp::GaussianProcess, data::DataFrame; mode::Symbol = :mean, n_samples::Int = 1) +with_gaussian_noise(gp::AbstractGPs.GP, σ²::Real) ``` From 1962e22e951f9e4ebe29f1cde3d6a7366a62b91e Mon Sep 17 00:00:00 2001 From: felixmett Date: Fri, 17 Oct 2025 15:17:27 +0200 Subject: [PATCH 114/117] Refactor example code blocks --- docs/src/manual/metamodels.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/src/manual/metamodels.md b/docs/src/manual/metamodels.md index e760c5598..042089754 100644 --- a/docs/src/manual/metamodels.md +++ b/docs/src/manual/metamodels.md @@ -28,7 +28,7 @@ using UncertaintyQuantification kernel = SqExponentialKernel() ∘ ScaleTransform(3.0) gp = GP(0.0, kernel) -return nothing # hide +nothing # hide ``` #### Posterior Gaussian Process @@ -87,7 +87,7 @@ df = DataFrame(x = x, y = y) σ² = 1e-5 gp = with_gaussian_noise(gp, σ²) gp_model = GaussianProcess(gp, df, :y) -return nothing # hide +nothing # hide ``` Now we can use our GP model to predict at new input locations `x_test`: @@ -112,7 +112,7 @@ y_true = sin.(x_test) + 0.3 * cos.(2 .* x_test) # hide plot!(x_test, y_true, color=:red, label="True function") # hide savefig(p, "posterior-gp.svg"); # hide -return nothing # hide +nothing # hide ``` ![](posterior-gp.svg) @@ -134,7 +134,7 @@ For numerical reasons, the logarithm of the marginal likelihood is typically use To optimize the hyperparameters of our GP model before computing the posterior GP, we can pass a gradient-based optimizer provided by [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/) to the `GaussianProcess` constructor: ```@example gaussianprocess -posterior_gp = GaussianProcess(gp, df, :y; optimization=MaximumLikelihoodEstimation()) +gp_model = GaussianProcess(gp, df, :y; optimization=MaximumLikelihoodEstimation()) prediction = DataFrame(:x => x_test) evaluate!(gp_model, prediction; mode=:mean_and_var) @@ -149,10 +149,10 @@ plot!( ) # hide plot!(x_test, y_true, color=:red, label="True function") # hide -savefig(p, "posterior-gp-optimized.svg"); # hide -return nothing # hide +savefig(p, "posterior-gp-opt.svg"); # hide +nothing # hide ``` -![](posterior-gp-optimized.svg) +![](posterior-gp-opt.svg) Internally, `MaximumLikelihoodEstimation()` defaults to using [`LBFGS`](https://julianlsolvers.github.io/Optim.jl/stable/algo/lbfgs/) optimizer that performs 10 optimization steps with standard optimization hyperparameters as defined [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/). Note that any other first-order optimizer supported by [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/), along with its corresponding hyperparameters, can also be used when constructing [`MaximumLikelihoodEstimation`](@ref). From 8450250ccaf4ceb5ce47c85b106ad67073a67c35 Mon Sep 17 00:00:00 2001 From: felixmett Date: Fri, 17 Oct 2025 15:17:46 +0200 Subject: [PATCH 115/117] Fix unresolved reference in docstring --- src/models/gp/gaussianprocess.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/gp/gaussianprocess.jl b/src/models/gp/gaussianprocess.jl index d23972535..a312c60f7 100644 --- a/src/models/gp/gaussianprocess.jl +++ b/src/models/gp/gaussianprocess.jl @@ -87,7 +87,7 @@ Constructs a Gaussian process model for the given input and model. Evaluates the # Arguments - `gp`: A Gaussian process object, typically from `AbstractGPs`, defining the kernel and mean. -- `input`: Single input or vector of inputs. The Gaussian process will only consider inputs of type ['RandomVariable](@ref) as input features. +- `input`: Single input or vector of inputs. The Gaussian process will only consider inputs of type [`RandomVariable`](@ref) as input features. - `model`: Single model or vector of models of supertype [`UQModel`](@ref) that the Gaussian process is supposed to model. - `output`: The name of the output (as a `Symbol`) to be modeled as the response variable. - `experimentaldesign`: The strategy utilized for sampling the input variables. From 703425d28a29eb8298d6b416cd552db2b3f4836e Mon Sep 17 00:00:00 2001 From: felixmett Date: Fri, 17 Oct 2025 15:18:14 +0200 Subject: [PATCH 116/117] Refactor internal docs to comments --- src/models/gp/parameterization.jl | 14 ++-- src/models/gp/standardization.jl | 125 +++++++++++++++--------------- 2 files changed, 70 insertions(+), 69 deletions(-) diff --git a/src/models/gp/parameterization.jl b/src/models/gp/parameterization.jl index 77b300ec5..217eb001d 100644 --- a/src/models/gp/parameterization.jl +++ b/src/models/gp/parameterization.jl @@ -163,11 +163,13 @@ apply_parameters(f::GP, θ) = GP( apply_parameters(f.kernel, θ[2]) ) -""" - NoisyGP(gp::GP, σ²::Real) - -Wraps a Gaussian process `gp` and adds learnable Gaussian observation noise with zero mean and variance `σ²` to the diagonal of its finite-dimensional covariance matrix. -""" +# --- +# NoisyGP(gp::GP, σ²::Real) +# +# Wraps a Gaussian process `gp` and adds learnable Gaussian observation noise +# with zero mean and variance `σ²` to the diagonal of its finite-dimensional +# covariance matrix. +# --- struct NoisyGP{T<:GP,Tn<:Real} gp::T σ²::Tn @@ -180,7 +182,7 @@ end Wraps a Gaussian process `gp` with additive Gaussian observation noise of variance `σ²`. -This creates a [`NoisyGP`](@ref) object, which adds `σ²` to the diagonal of the covariance +This creates a Gaussian process object, which adds `σ²` to the diagonal of the covariance matrix when evaluating the finite-dimensional projection of `gp`. # Examples diff --git a/src/models/gp/standardization.jl b/src/models/gp/standardization.jl index ba49d2141..af594fbb1 100644 --- a/src/models/gp/standardization.jl +++ b/src/models/gp/standardization.jl @@ -54,7 +54,7 @@ struct UnitRangeTransform <: AbstractDataTransform end A normalization transform that transforms data to the standard normal space. -Can only be used as an input transformation in a [`GaussianProcess`](@ref) for inputs of type [`UQInput`](@ref). +Can only be used as an input transformation in a [`GaussianProcess`](@ref) for inputs of type [`RandomVariable`](@ref). Internally, the `DataStandardizer` constructs the function required for evaluation. # Examples @@ -73,63 +73,60 @@ struct OutputTransform{T <: AbstractDataTransform} end OutputTransform(::Type{T}) where {T <: AbstractDataTransform} = OutputTransform{T}() OutputTransform(x::AbstractDataTransform) = OutputTransform(typeof(x)) -# ---------------- Struct for bundled transform functions ---------------- -""" -# Developer Note - - DataStandardizer(fᵢ::Function, fₒ::Function, fₒ⁻¹::Function, var_fₒ⁻¹::Function) - -Bundles input and output transformation functions for Gaussian process models. - -# Fields - -- `fᵢ` - function applied to input data. -- `fₒ` - function applied to output data. -- `fₒ⁻¹` - inverse function for the output transformation. -- `var_fₒ⁻¹` - function for transforming output variances. - -!!! note "Inverse output transformations" - -Gaussian process regression requires two distinct inverse transformations for the output: -one for the mean predictions (this same transformation can also be applied to function samples) and one for the variance predictions. - -Consider a z-score transformation of output ``y``: - ```math - \tilde{y} = \frac{y - μ}{σ}. - ``` -To recover the mean of the untransformed output, we can simply apply the inverse transformation: - ```math - E[y] = E[σ\tilde{y} + μ] = σE[\tilde{y}] + μ. - ``` -Analogously, sampled functions ``\tilde{y}_s`` from the Gaussian process regression model can be transformed back: - ```math - y_s = σ\tilde{y}_s + μ. - ``` -The variance, however, is untransformed as follows: - ```math - Var[y] = E[(σ\tilde{y} + μ - E[σ\tilde{y} + μ])^2] = E[(σ^2(\tilde{y} - E[\tilde{y}])^2] = σ^2 Var[\tilde{y}] - ``` -Hence, `fₒ⁻¹` and `var_fₒ⁻¹` must be implemented separately. - -# Constructor - - DataStandardizer( - data::DataFrame, - input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, - output::Symbol, - input_transform::InputTransform, - output_transform::OutputTransform - ) - -Constructs a set of transformation functions from the provided data and user-specified input/output transforms. -Internally, it uses `build_datatransform` to create the actual functions. - -# Purpose - -This struct allows [`GaussianProcess`](@ref) models to consistently apply input and output transformations - (like `ZScoreTransform` or `IdentityTransform`) while keeping the API simple for end-users. -The `AbstractDataTransform` structs signal the desired behavior, and `DataStandardizer` converts them into callable functions for internal use. -""" +# --- +# # Developer Note + +# DataStandardizer bundles input and output transformation functions for Gaussian process models. + +# # Fields + +# - `fᵢ` - function applied to input data. +# - `fₒ` - function applied to output data. +# - `fₒ⁻¹` - inverse function for the output transformation. +# - `var_fₒ⁻¹` - function for transforming output variances. + +# !!! note "Inverse output transformations" + +# Gaussian process regression requires two distinct inverse transformations for the output: +# one for the mean predictions (this same transformation can also be applied to function samples) and one for the variance predictions. + +# Consider a z-score transformation of output ``y``: +# ```math +# \tilde{y} = \frac{y - μ}{σ}. +# ``` +# To recover the mean of the untransformed output, we can simply apply the inverse transformation: +# ```math +# E[y] = E[σ\tilde{y} + μ] = σE[\tilde{y}] + μ. +# ``` +# Analogously, sampled functions ``\tilde{y}_s`` from the Gaussian process regression model can be transformed back: +# ```math +# y_s = σ\tilde{y}_s + μ. +# ``` +# The variance, however, is untransformed as follows: +# ```math +# Var[y] = E[(σ\tilde{y} + μ - E[σ\tilde{y} + μ])^2] = E[(σ^2(\tilde{y} - E[\tilde{y}])^2] = σ^2 Var[\tilde{y}] +# ``` +# Hence, `fₒ⁻¹` and `var_fₒ⁻¹` must be implemented separately. + +# # Constructor + +# DataStandardizer( +# data::DataFrame, +# input::Union{Symbol, Vector{<:Symbol}, UQInput, Vector{<:UQInput}}, +# output::Symbol, +# input_transform::InputTransform, +# output_transform::OutputTransform +# ) + +# Constructs a set of transformation functions from the provided data and user-specified input/output transforms. +# Internally, it uses `build_datatransform` to create the actual functions. + +# # Purpose + +# This struct allows [`GaussianProcess`](@ref) models to consistently apply input and output transformations +# (like `ZScoreTransform` or `IdentityTransform`) while keeping the API simple for end-users. +# The `AbstractDataTransform` structs signal the desired behavior, and `DataStandardizer` converts them into callable functions for internal use. +# --- struct DataStandardizer fᵢ::Function fₒ::Function @@ -149,12 +146,14 @@ function DataStandardizer( return DataStandardizer(fᵢ, fₒ, fₒ⁻¹, var_fₒ⁻¹) end -# ---------------- Transform builders ---------------- -""" - build_datatransform(data, input/output, transform) -Returns a function (or pair of functions for outputs) that applies the specified transformation to a dataframe. -""" +# --- +# build_datatransform(data, input/output, transform) +# +# Returns a function (or pair of functions for outputs) that applies the specified +# transformation to a dataframe. +# --- + # ---------------- Input ---------------- # No input transformation function build_datatransform( From b9a8dddfa731515d01aec04a64f0b95fca5cbae3 Mon Sep 17 00:00:00 2001 From: Cr0gan Date: Wed, 22 Oct 2025 16:51:17 +0200 Subject: [PATCH 117/117] Fix faulty indent and linting errors --- docs/src/manual/metamodels.md | 46 +++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/docs/src/manual/metamodels.md b/docs/src/manual/metamodels.md index 042089754..df2c2dad7 100644 --- a/docs/src/manual/metamodels.md +++ b/docs/src/manual/metamodels.md @@ -13,13 +13,14 @@ A Response Surface is a simple polynomial surrogate model. It can be trained by ## Gaussian Process Regression ### Theoretical Background + A Gaussian Process (GP) is a collection of random variables, any finite subset of which has a joint Gaussian distribution. It is fully specified by a mean function $m(x)$ and a covariance (kernel) function $k(x, x')$. In GP regression, we aim to model an unknown function $f(x)$. Before observing any data, we assume that the function $f(x)$ is distributed according to a GP: ```math f(x) \sim \mathcal{G}\mathcal{P}\left( m(x), k(x, x') \right). ``` -This prior GP specifies that any finite collection of function values follows a multivariate normal distribution. +This prior GP specifies that any finite collection of function values follows a multivariate normal distribution. To define a prior GP we use [`AbstractGPs.jl`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/) for the GP interface and mean function, and [`KernelFunctions.jl`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/) for the definition of a covariance kernel. Below, we construct a simple prior GP with a constant zero mean function and a scaled squared exponential kernel: @@ -27,12 +28,12 @@ To define a prior GP we use [`AbstractGPs.jl`](https://juliagaussianprocesses.gi using UncertaintyQuantification kernel = SqExponentialKernel() ∘ ScaleTransform(3.0) -gp = GP(0.0, kernel) -nothing # hide +gp = GP(0.0, kernel); nothing # hide ``` #### Posterior Gaussian Process -The posterior GP represents the distribution of functions after incorporating observed data. We denote the observation data as: + +The posterior GP represents the distribution of functions after incorporating observed data. We denote the observation data as: ```math \mathcal{D} = \lbrace (\hat{x}_i, \hat{f}_i) \mid i=1, \dots, N \rbrace, @@ -42,7 +43,7 @@ where $\hat{f}_i = f(\hat{x}_i)$ in the noise-free observation case, and $\hat{f ```math \mu(\hat{X}) = [m(\hat{x}_1), \dots, m(\hat{x}_N)], \quad K(\hat{X}, \hat{X}) \text{ with entries } K_{ij} = k(\hat{x}_i, \hat{x}_j). - ``` +``` For a new input location $x^*$ we are interested at the unknown function value $f^* = f(x^*)$. By the definition of a GP, the joint distribution of observed outputs $\hat{f}_i$ and the unknown $f^*$ is multivariate Gaussian: @@ -51,6 +52,7 @@ For a new input location $x^*$ we are interested at the unknown function value $ ``` where: + - $K(\hat{X}, \hat{X})$ is the covariance matrix with entries $K_{ij} = k(\hat{x}_i, \hat{x}_j)$, - $K(\hat{X}, x^*)$ is the covariance matrix with entries $K_{i1} = k(\hat{x}_i, x^*)$, - and $K(x^*, x^*)$ is the variance at the unknown input location. @@ -61,7 +63,7 @@ We can then obtain the posterior distribution of $f^*$ from the properties of mu f^* \mid \hat{X}, \hat{f}, x^* \sim \mathcal{N}(\mu^*(x^*), \Sigma^*(x^*)), ``` -with +with ```math \mu^*(x^*) = m(x^*) + K(x^*, \hat{X})K(\hat{X}, \hat{X})^{-1}(\hat{f} - \mu(\hat{X})), \\ @@ -84,10 +86,9 @@ x = collect(range(0, 10, 10)) y = sin.(x) + 0.3 * cos.(2 .* x) df = DataFrame(x = x, y = y) -σ² = 1e-5 +σ² = 1e-5 gp = with_gaussian_noise(gp, σ²) -gp_model = GaussianProcess(gp, df, :y) -nothing # hide +gp_model = GaussianProcess(gp, df, :y); nothing # hide ``` Now we can use our GP model to predict at new input locations `x_test`: @@ -104,20 +105,21 @@ prediction_std = sqrt.(prediction[!, :y_var]) # hide p = plot(x_test, prediction_mean, color=:blue, label="Mean prediction") # hide plot!( - x_test, prediction_mean, ribbon=2 .* prediction_std, + x_test, prediction_mean, ribbon=2 .* prediction_std, color=:grey, alpha=0.5, label="Confidence band" ) # hide y_true = sin.(x_test) + 0.3 * cos.(2 .* x_test) # hide plot!(x_test, y_true, color=:red, label="True function") # hide -savefig(p, "posterior-gp.svg"); # hide -nothing # hide +savefig(p, "posterior-gp.svg"); nothing # hide ``` -![](posterior-gp.svg) + +![Fitted Gaussian process](posterior-gp.svg) #### Hyperparameter optimization -GP models typically contain hyperparameters in their mean functions $m(x; \theta_m)$ and covariance kernel functions $k(x, x'; \theta_k)$. The observation noise variance $\sigma^2_{e}$ is also considered a hyperparameter related to the kernel. The choice of hyperparameters strongly affects the quality of the posterior GP. + +GP models typically contain hyperparameters in their mean functions $m(x; \theta_m)$ and covariance kernel functions $k(x, x'; \theta_k)$. The observation noise variance $\sigma^2_{e}$ is also considered a hyperparameter related to the kernel. The choice of hyperparameters strongly affects the quality of the posterior GP. A common approach to selecting hyperparameters is maximum likelihood estimation (MLE) (see, e.g. [rasmussen2005gaussian](@cite)), where we maximize the likelihood of observing the training data $\mathcal{D}$ under the chosen GP prior. @@ -127,7 +129,7 @@ The marginal likelihood of the observed training outputs $\hat{f}$ is: p(\hat{f} \mid \hat{X}, \theta_m, \theta_k, \sigma^2_{e}) = \mathcal{N}(\hat{f} \mid \mu_{\theta_m}(\hat{X}), K_{\theta_k}(\hat{X}, \hat{X}) + \sigma^2_{e}I), ``` -where $\mu_{\theta_m}(\hat{X})$ and $K_{\theta_k}(\hat{X}, \hat{X})$ denote the parameter dependent versions of the previously defined quantities. +where $\mu_{\theta_m}(\hat{X})$ and $K_{\theta_k}(\hat{X}, \hat{X})$ denote the parameter dependent versions of the previously defined quantities. For numerical reasons, the logarithm of the marginal likelihood is typically used. Maximizing the log marginal likelihood with respect to the hyperparameters then yields the parameters that best explain the observed data. After obtaining the optimal hyperparamters, the posterior GP can be constructed as described above. @@ -144,28 +146,30 @@ prediction_std = sqrt.(prediction[!, :y_var]) # hide p = plot(x_test, prediction_mean, color=:blue, label="Mean prediction") # hide plot!( - x_test, prediction_mean, ribbon=2 .* prediction_std, + x_test, prediction_mean, ribbon=2 .* prediction_std, color=:grey, alpha=0.5, label="Confidence band" ) # hide plot!(x_test, y_true, color=:red, label="True function") # hide -savefig(p, "posterior-gp-opt.svg"); # hide -nothing # hide +savefig(p, "posterior-gp-opt.svg"); nothing # hide ``` -![](posterior-gp-opt.svg) + +![Optimized Gaussian process](posterior-gp-opt.svg) Internally, `MaximumLikelihoodEstimation()` defaults to using [`LBFGS`](https://julianlsolvers.github.io/Optim.jl/stable/algo/lbfgs/) optimizer that performs 10 optimization steps with standard optimization hyperparameters as defined [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/). Note that any other first-order optimizer supported by [`Optim.jl`](https://julianlsolvers.github.io/Optim.jl/stable/), along with its corresponding hyperparameters, can also be used when constructing [`MaximumLikelihoodEstimation`](@ref). -During optimization, GP hyperparameters $\theta_m, \theta_k$ and $\sigma^2_{e}$ are automatically extracted and updated. +During optimization, GP hyperparameters $\theta_m, \theta_k$ and $\sigma^2_{e}$ are automatically extracted and updated. We support the automatic extraction of hyperparameters from mean functions provided by [`AbstractGPs.jl`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/api/#Mean-functions), with the exception of: + - Custom mean functions [`CustomMean`](https://juliagaussianprocesses.github.io/AbstractGPs.jl/stable/api/#AbstractGPs.CustomMean). These are defined with a custom function that itself could depend on hyperparameters. These additional hyperparameters are ignored in the optimization. Kernel functions are defined with the kernels and transformations provided by [`KernelFunctions.jl`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/). For similar reasons as with `CustomMean`, we do not extract potential function hyperparameters from the following kernels or transforms: + - Transforms defined with custom functions [`FunctionTransform`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/transform/#KernelFunctions.FunctionTransform), - The [`GibbsKernel`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/kernels/#KernelFunctions.GibbsKernel), which models a kernel lengthscale parameter with the help of a function. Further, GP models containing the following kernels are not supported for hyperparameter optimization currently: + - Multi-output kernels [`MOKernel`](https://juliagaussianprocesses.github.io/KernelFunctions.jl/stable/kernels/#Multi-output-Kernels), - Neural kernel networks [`NeuralKernelNetwork`]. -