Merge pull request #47 from TuringLang/kx/improve-interface

yebai · web-flow · commit 64a72da3f05c · 2019-04-03T10:09:01.000+01:00
Improve NUTS interface
diff --git a/src/adaptation.jl b/src/adaptation.jl
@@ -19,9 +19,9 @@ function update(h::Hamiltonian, prop::AbstractProposal, dpc::Adaptation.Abstract
 end
 
 function update(h::Hamiltonian, prop::AbstractProposal, da::NesterovDualAveraging)
-    return h, prop(getϵ(da))
+    return h, prop(prop.integrator(getϵ(da)))
 end
 
 function update(h::Hamiltonian, prop::AbstractProposal, ca::Adaptation.AbstractCompositeAdaptor)
-    return h(getM⁻¹(ca.pc)), prop(getϵ(ca.ssa))
+    return h(getM⁻¹(ca.pc)), prop(prop.integrator(getϵ(ca.ssa)))
 end
diff --git a/src/adaptation/stepsize.jl b/src/adaptation/stepsize.jl
@@ -105,6 +105,7 @@ function adapt_stepsize!(da::NesterovDualAveraging, α::AbstractFloat)
     ϵ = exp(x)
     DEBUG && @debug "Adapting step size..." "new ϵ = $ϵ" "old ϵ = $(da.state.ϵ)"
 
+    # TODO: we might want to remove this when all other numerical issues are correctly handelled
     if isnan(ϵ) || isinf(ϵ)
         @warn "Incorrect ϵ = $ϵ; ϵ_previous = $(da.state.ϵ) is used instead."
         ϵ = da.state.ϵ
diff --git a/src/hamiltonian.jl b/src/hamiltonian.jl
@@ -16,7 +16,17 @@ end
 ∂H∂r(h::Hamiltonian{<:DenseEuclideanMetric}, r::AbstractVector) = h.metric.M⁻¹ * r
 
 function hamiltonian_energy(h::Hamiltonian, θ::AbstractVector, r::AbstractVector)
-    return kinetic_energy(h, r, θ) + potential_energy(h, θ)
+    K = kinetic_energy(h, r, θ)
+    if isnan(K)
+        K = Inf
+        @warn "Kinetic energy is `NaN` and is set to `Inf`."
+    end
+    V = potential_energy(h, θ)
+    if isnan(V)
+        V = Inf
+        @warn "Potential energy is `NaN` and is set to `Inf`."
+    end
+    return K + V
 end
 
 potential_energy(h::Hamiltonian, θ::AbstractVector) = -h.logπ(θ)
diff --git a/src/proposal.jl b/src/proposal.jl
@@ -7,12 +7,12 @@ struct TakeLastProposal{I<:AbstractIntegrator} <: StaticTrajectory{I}
     n_steps     ::  Int
 end
 
-# Create a `TakeLastProposal` with a new `ϵ`
-function (tlp::TakeLastProposal)(ϵ::AbstractFloat)
-    return TakeLastProposal(tlp.integrator(ϵ), tlp.n_steps)
+# Create a `TakeLastProposal` with a new integrator
+function (tlp::TakeLastProposal)(integrator::AbstractIntegrator)
+    return TakeLastProposal(integrator, tlp.n_steps)
 end
 
-function propose(prop::TakeLastProposal, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T}) where {T<:Real}
+function transition(prop::TakeLastProposal, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T}) where {T<:Real}
     θ, r, _ = steps(prop.integrator, h, θ, r, prop.n_steps)
     return θ, -r
 end
@@ -21,21 +21,22 @@ abstract type DynamicTrajectory{I<:AbstractIntegrator} <: AbstractHamiltonianTra
 abstract type NoUTurnTrajectory{I<:AbstractIntegrator} <: DynamicTrajectory{I} end
 struct NUTS{I<:AbstractIntegrator} <: NoUTurnTrajectory{I}
     integrator  ::  I
+    max_depth   ::  Int
+    Δ_max       ::  AbstractFloat
 end
 
-# Create a `NUTS` with a new `ϵ`
-function (snuts::NUTS)(ϵ::AbstractFloat)
-    return NUTS(snuts.integrator(ϵ))
+# Helper function to use default values
+NUTS(integrator::AbstractIntegrator) = NUTS(integrator, 10, 1000.0)
+
+# Create a `NUTS` with a new integrator
+function (snuts::NUTS)(integrator::AbstractIntegrator)
+    return NUTS(integrator, snuts.max_depth, snuts.Δ_max)
 end
 
 struct MultinomialNUTS{I<:AbstractIntegrator} <: NoUTurnTrajectory{I}
     integrator  ::  I
 end
 
-function NUTS(h::Hamiltonian, θ::AbstractVector{T}) where {T<:Real}
-    return NUTS(Leapfrog(find_good_eps(h, θ)))
-end
-
 function find_good_eps(rng::AbstractRNG, h::Hamiltonian, θ::AbstractVector{T}; max_n_iters::Int=100) where {T<:Real}
     ϵ′ = ϵ = 0.1
     a_min, a_cross, a_max = 0.25, 0.5, 0.75 # minimal, crossing, maximal accept ratio
@@ -93,14 +94,14 @@ end
 find_good_eps(h::Hamiltonian, θ::AbstractVector{T}; max_n_iters::Int=100) where {T<:Real} = find_good_eps(GLOBAL_RNG, h, θ; max_n_iters=max_n_iters)
 
 # TODO: implement a more efficient way to build the balance tree
-function build_tree(rng::AbstractRNG, nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T}, logu::AbstractFloat, v::Int, j::Int, H::AbstractFloat;
-                    Δ_max::AbstractFloat=1000.0) where {I<:AbstractIntegrator,T<:Real}
+function build_tree(rng::AbstractRNG, nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T},
+                    logu::AbstractFloat, v::Int, j::Int, H::AbstractFloat) where {I<:AbstractIntegrator,T<:Real}
     if j == 0
         # Base case - take one leapfrog step in the direction v.
         θ′, r′, _is_valid = step(nt.integrator, h, θ, r)
         H′ = _is_valid ? hamiltonian_energy(h, θ′, r′) : Inf
         n′ = (logu <= -H′) ? 1 : 0
-        s′ = (logu < Δ_max + -H′) ? 1 : 0
+        s′ = (logu < nt.Δ_max + -H′) ? 1 : 0
         α′ = exp(min(0, H - H′))
 
         return θ′, r′, θ′, r′, θ′, r′, n′, s′, α′, 1
@@ -128,18 +129,17 @@ function build_tree(rng::AbstractRNG, nt::NoUTurnTrajectory{I}, h::Hamiltonian,
     end
 end
 
-build_tree(nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T}, logu::AbstractFloat, v::Int, j::Int, H::AbstractFloat;
-           Δ_max::AbstractFloat=1000.0) where {I<:AbstractIntegrator,T<:Real} = build_tree(GLOBAL_RNG, nt, h, θ, r, logu, v, j, H; Δ_max=Δ_max)
+build_tree(nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T},
+           logu::AbstractFloat, v::Int, j::Int, H::AbstractFloat) where {I<:AbstractIntegrator,T<:Real} = build_tree(GLOBAL_RNG, nt, h, θ, r, logu, v, j, H)
 
-function propose(rng::AbstractRNG, nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T};
-                 j_max::Int=10) where {I<:AbstractIntegrator,T<:Real}
+function transition(rng::AbstractRNG, nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T}) where {I<:AbstractIntegrator,T<:Real}
     H = hamiltonian_energy(h, θ, r)
     logu = log(rand(rng)) - H
 
     θm = θ; θp = θ; rm = r; rp = r; j = 0; θ_new = θ; r_new = r; n = 1; s = 1
 
     local α, nα
-    while s == 1 && j <= j_max
+    while s == 1 && j <= nt.max_depth
         v = rand(rng, [-1, 1])
         if v == -1
             θm, rm, _, _, θ′, r′,n′, s′, α, nα = build_tree(rng, nt, h, θm, rm, logu, v, j, H)
@@ -162,8 +162,7 @@ function propose(rng::AbstractRNG, nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ:
     return θ_new, r_new, α / nα
 end
 
-propose(nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T};
-        j_max::Int=10) where {I<:AbstractIntegrator,T<:Real} = propose(GLOBAL_RNG, nt, h, θ, r; j_max=j_max)
+transition(nt::NoUTurnTrajectory{I}, h::Hamiltonian, θ::AbstractVector{T}, r::AbstractVector{T}) where {I<:AbstractIntegrator,T<:Real} = transition(GLOBAL_RNG, nt, h, θ, r)
 
 function MultinomialNUTS(h::Hamiltonian, θ::AbstractVector{T}) where {T<:Real}
     return MultinomialNUTS(Leapfrog(find_good_eps(h, θ)))
diff --git a/src/sampler.jl b/src/sampler.jl
@@ -4,38 +4,51 @@ function mh_accept(rng::AbstractRNG, H::AbstractFloat, H_new::AbstractFloat)
 end
 mh_accept(H::AbstractFloat, H_new::AbstractFloat) = mh_accept(GLOBAL_RNG, logα)
 
-function sample(h::Hamiltonian, prop::AbstractProposal, θ::AbstractVector{T}, n_samples::Int; verbose::Bool=true) where {T<:Real}
+sample(h::Hamiltonian, prop::AbstractProposal, θ::AbstractVector{T}, n_samples::Int; verbose::Bool=true) where {T<:Real} =
+    sample(GLOBAL_RNG, h, prop, θ, n_samples; verbose=verbose)
+
+function sample(rng::AbstractRNG, h::Hamiltonian, prop::AbstractProposal, θ::AbstractVector{T}, n_samples::Int; verbose::Bool=true) where {T<:Real}
     θs = Vector{Vector{T}}(undef, n_samples)
     Hs = Vector{T}(undef, n_samples)
     αs = Vector{T}(undef, n_samples)
     time = @elapsed for i = 1:n_samples
-        θs[i], Hs[i], αs[i] = step(h, prop, i == 1 ? θ : θs[i-1])
+        θs[i], Hs[i], αs[i] = step(rng, h, prop, i == 1 ? θ : θs[i-1])
     end
-    verbose && @info "Finished sampling with $time (s)" typeof(h) typeof(prop) EBFMI(Hs) mean(αs)
+    verbose && @info "Finished sampling with $time (s)" typeof(h.metric) typeof(prop) EBFMI(Hs) mean(αs)
     return θs
 end
 
-function sample(h::Hamiltonian, prop::AbstractProposal, θ::AbstractVector{T}, n_samples::Int, adaptor::Adaptation.AbstractAdaptor,
+sample(h::Hamiltonian, prop::AbstractProposal, θ::AbstractVector{T}, n_samples::Int, adaptor::Adaptation.AbstractAdaptor,
+       n_adapts::Int=min(div(n_samples, 10), 1_000); verbose::Bool=true) where {T<:Real} =
+       sample(GLOBAL_RNG, h, prop, θ, n_samples, adaptor, n_adapts; verbose=verbose)
+
+function sample(rng::AbstractRNG, h::Hamiltonian, prop::AbstractProposal, θ::AbstractVector{T}, n_samples::Int, adaptor::Adaptation.AbstractAdaptor,
                 n_adapts::Int=min(div(n_samples, 10), 1_000); verbose::Bool=true) where {T<:Real}
     θs = Vector{Vector{T}}(undef, n_samples)
     Hs = Vector{T}(undef, n_samples)
     αs = Vector{T}(undef, n_samples)
     time = @elapsed for i = 1:n_samples
-        θs[i], Hs[i], αs[i] = step(h, prop, i == 1 ? θ : θs[i-1])
+        θs[i], Hs[i], αs[i] = step(rng, h, prop, i == 1 ? θ : θs[i-1])
         if i <= n_adapts
             adapt!(adaptor, θs[i], αs[i])
             h, prop = update(h, prop, adaptor)
-            verbose && i == n_adapts && @info "Finished $n_adapts adapation steps" typeof(adaptor) prop.integrator.ϵ h.metric
+            if verbose
+                if i == n_adapts
+                    @info "Finished $n_adapts adapation steps" typeof(adaptor) prop.integrator.ϵ h.metric
+                elseif i % Int(n_adapts / 10) == 0
+                    @info "Adapting $i of $n_adapts steps" typeof(adaptor) prop.integrator.ϵ h.metric
+                end
+            end
         end
     end
-    verbose && @info "Finished $n_samples sampling steps in $time (s)" typeof(h) typeof(prop) EBFMI(Hs) mean(αs)
+    verbose && @info "Finished $n_samples sampling steps in $time (s)" typeof(h.metric) typeof(prop) EBFMI(Hs) mean(αs)
     return θs
 end
 
 function step(rng::AbstractRNG, h::Hamiltonian, prop::TakeLastProposal{I}, θ::AbstractVector{T}) where {T<:Real,I<:AbstractIntegrator}
     r = rand_momentum(rng, h)
     H = hamiltonian_energy(h, θ, r)
-    θ_new, r_new = propose(prop, h, θ, r)
+    θ_new, r_new = transition(prop, h, θ, r)
     H_new = hamiltonian_energy(h, θ_new, r_new)
     # Accept via MH criteria
     is_accept, α = mh_accept(rng, H, H_new)
@@ -47,7 +60,7 @@ end
 
 function step(rng::AbstractRNG, h::Hamiltonian, prop::NUTS{I}, θ::AbstractVector{T}) where {T<:Real,I<:AbstractIntegrator}
     r = rand_momentum(rng, h)
-    θ_new, r_new, α = propose(rng, prop, h, θ, r)
+    θ_new, r_new, α = transition(rng, prop, h, θ, r)
     H_new = hamiltonian_energy(h, θ_new, r_new)
     # We always accept in NUTS
     return θ_new, H_new, α
diff --git a/test/proposal.jl b/test/proposal.jl
@@ -11,12 +11,14 @@ prop = NUTS(Leapfrog(find_good_eps(h, θ_init)))
 r_init = AdvancedHMC.rand_momentum(h)
 
 @testset "Passing random number generator" begin
-    rng = MersenneTwister(1234)
-    θ1, r1 = AdvancedHMC.propose(rng, prop, h, θ_init, r_init)
+    for seed in [1234, 5678, 90]
+        rng = MersenneTwister(seed)
+        θ1, r1 = AdvancedHMC.transition(rng, prop, h, θ_init, r_init)
 
-    rng = MersenneTwister(1234)
-    θ2, r2 = AdvancedHMC.propose(rng, prop, h, θ_init, r_init)
+        rng = MersenneTwister(seed)
+        θ2, r2 = AdvancedHMC.transition(rng, prop, h, θ_init, r_init)
 
-    @test θ1 == θ2
-    @test r1 == r2
+        @test θ1 == θ2
+        @test r1 == r2
+    end
 end