MPF-Optimization-Laboratory · mertkipcak · Nov 30, 2024 · Dec 11, 2024 · Dec 11, 2024 · Jan 17, 2025
diff --git a/src/KLLS.jl b/src/KLLS.jl
@@ -15,12 +15,13 @@ using NonlinearSolve, LinearSolve
 import SciMLBase: ReturnCode
 
 export KLLSModel, SSModel, OTModel, LPModel
-export NewtonEQ, SSTrunkLS, SequentialSolve, LevelSet, AdaptiveLevelSet
+export NewtonEQ, SSTrunkLS, SequentialSolve, LevelSet, AdaptiveLevelSet, ExpKernel
 export solve!, scale!, regularize!, histogram, maximize!, reset!, update_y0!
 
 DEFAULT_PRECISION(T) = (eps(T))^(1/3)
 
 include("logsumexp.jl")
+include("exp-kernel.jl")
 include("klls-model.jl")
 include("ss-model.jl")
 include("newtoncg.jl")

diff --git a/src/exp-kernel.jl b/src/exp-kernel.jl
@@ -0,0 +1,65 @@
+struct ExpKernel{T<:AbstractFloat, V1<:AbstractVector{T}, V2<:AbstractVector{T}}
+    q::V1  # prior
+    g::V2  # buffer for gradient
+end
+
+"""
+Constructor for the ExpKernel object.
+
+If an n-vector of priors `q` is available:
+
+    ExpKernel(q)
+
+If no prior is known, instead provide the dimension `n`:
+
+    ExpKernel(n)
+"""
+function ExpKernel(q::AbstractVector)
+    @assert (all(ζ -> ζ ≥ 0, q)) "prior is not nonnegative"
+    ExpKernel(q, similar(q))
+end
+
+"""
+Evaluate ∑exp, its gradient, and Hessian at `p`:
+
+    f = obj!(expk, p)
+
+where `p` is a vector of length `n` and `expk` is a ExpKernel object.
+"""
+function obj!(expk::ExpKernel{T}, p) where T
+    @unpack q, g = expk
+    @. g = q .* exp(p)
+    @. g = g ./ exp(one(T))
+    return sum(g)
+end
+
+"""
+Get the gradient of ∑exp at the point `p` where
+the `expk` objective was last evaluated:
+
+    g = grad(expk)
+"""
+grad(expk::ExpKernel) = expk.g
+
+"""
+Get the Hessian of ∑exp at the point `p` where
+the `expk` objective was last evaluated:
+
+    H = hess(expk)
+"""
+function hess(expk::ExpKernel)
+    g = expk.g
+    return Diagonal(g)
+end
+
+"""
+Get the Hessian vector product of logΣexp at the point `p`
+where the `expk` objective was last evaluated:
+
+    Hz = hessvp!(expk, z)
+"""
+function hessvp!(expk::ExpKernel{T}, z::AbstractVector{T}) where T
+    g = expk.g
+    z .= g.*z
+    return z
+end
diff --git a/src/klls-model.jl b/src/klls-model.jl
@@ -3,14 +3,14 @@ Structure for KLLS model
     - `A` is the matrix of constraints
     - `b` is the right-hand side
     - `q` is the prior (default: uniform)
-    - `lse` is the log-sum-exp function
+    - `kernel` is the kernel used to regularize (default: LSE for simplex)
     - `λ` is the regularization parameter
     - `C` is a positive definite scaling matrix
     - `w` is an n-buffer for the Hessian-vector product 
     - `bNrm` is the norm of the right-hand side
     - `name` is the name of the problem
 """
-@kwdef mutable struct KLLSModel{T<:AbstractFloat, M, CT, SB<:AbstractVector{T}, S<:AbstractVector{T}} <: AbstractNLPModel{T, S}
+@kwdef mutable struct KLLSModel{T<:AbstractFloat, M, CT, SB<:AbstractVector{T}, S<:AbstractVector{T}, K} <: AbstractNLPModel{T, S}
     A::M
     b::SB
     c::S = begin
@@ -29,7 +29,7 @@ Structure for KLLS model
     nbuf::S = similar(q)
     bNrm::T = norm(b)
     scale::T = one(eltype(A))
-    lse::LogExpFunction = LogExpFunction(q)
+    kernel::K = LogExpFunction(q)
     name::String = ""
     meta::NLPModelMeta{T, S} = begin
         m = size(A, 1)

diff --git a/src/level-set.jl b/src/level-set.jl
@@ -289,7 +289,7 @@ function oracle_callback(
     trace::Bool=false,
 ) where {T}
     y = solver.x
-    x = kl.scale * grad(kl.lse)
+    x = kl.scale * grad(kl.kernel)
     dObj = -trunk_stats.objective - σ
     iter = trunk_stats.iter
     r = trunk_stats.dual_feas # = ||∇ dual obj(x)||
@@ -322,7 +322,7 @@ function oracle_callback(
     elseif tired
         trunk_stats.status = :max_iter
     elseif pObj < α * dObj && dObj > 0
-        st = -obj!(kl.lse, kl.A'y) + log(kl.scale) + 1
+        st = -obj!(kl.kernel, kl.A'y) + log(kl.scale) + 1
         ret .= [dObj, pObj, st]
         trunk_stats.status = :user # Ends the oracle iterations
     end

diff --git a/src/logsumexp.jl b/src/logsumexp.jl
@@ -85,3 +85,15 @@ function hess(lse::LogExpFunction)
     g = lse.g
     return Diagonal(g) - g * g'
 end
+
+"""
+Get the Hessian vector product of logΣexp at the point `p`
+where the `lse` objective was last evaluated:
+
+    Hz = hessvp!(lse, z)
+"""
+function hessvp!(lse::LogExpFunction{T}, z::AbstractVector{T}) where T
+    g = lse.g
+    z .= g.*(z .- (dot(g, z)))
+    return z
+end
diff --git a/src/newtoncg.jl b/src/newtoncg.jl
@@ -1,27 +1,27 @@
 """
-Compute f(y) = logΣexp(A'y - c) and it's gradient,
-stored in the `lse` internal buffer.
+Compute f(y) = kernel(A'y - c) and it's gradient,
+stored in the `kernel` internal buffer.
 """
-function lseatyc!(kl, y)
-    @unpack A, c, nbuf, lse = kl
+function kernelatyc!(kl, y)
+    @unpack A, c, nbuf, kernel = kl
     nbuf .= c
     mul!(nbuf, A', y, 1, -1)
-    return obj!(lse, nbuf)
+    return obj!(kernel, nbuf)
 end
 
 """
 Dual objective:
 
 - base case (no scaling, unweighted 2-norm):
-    f(y) = log∑exp(A'y - c) - 0.5λ y∙Cy - b∙y
+    f(y) = kernel(A'y - c) - 0.5λ y∙Cy - b∙y
 
 - with scaling and weighted 2-norm:
-    f(y) = τ log∑exp(A'y - c) - τ log τ + 0.5λ y∙Cy - b∙y
+    f(y) = τ kernel(A'y - c) - τ log τ + 0.5λ y∙Cy - b∙y
 """
 function dObj!(kl::KLLSModel, y)
     @unpack b, λ, C, scale = kl 
     increment!(kl, :neval_jtprod)
-    f = lseatyc!(kl, y)
+    f = kernelatyc!(kl, y)
     return scale*f - scale*log(scale) + 0.5λ*dot(y, C, y) - b⋅y
 end
 
@@ -30,14 +30,14 @@ NLPModels.obj(kl::KLLSModel, y) = dObj!(kl, y)
 """
 Dual objective gradient
 
-   ∇f(y) = τ A∇log∑exp(A'y-c) + λCy - b 
+   ∇f(y) = τ A∇kernel(A'y-c) + λCy - b 
 
 evaluated at `y`. Assumes that the objective was last evaluated at the same point `y`.
 """
 function dGrad!(kl::KLLSModel, y, ∇f)
-    @unpack A, b, λ, C, lse, scale = kl
+    @unpack A, b, λ, C, kernel, scale = kl
     increment!(kl, :neval_jprod)
-    p = grad(lse)
+    p = grad(kernel)
     ∇f .= -b
     if λ > 0
         mul!(∇f, C, y, λ, 1)
@@ -49,8 +49,8 @@ end
 NLPModels.grad!(kl::KLLSModel, y, ∇f) = dGrad!(kl, y, ∇f)
 
 function dHess(kl::KLLSModel)
-    @unpack A, λ, C, lse, scale = kl
-    H = hess(lse)
+    @unpack A, λ, C, kernel, scale = kl
+    H = hess(kernel)
     ∇²dObj = scale*(A*H*A')
     if λ > 0
         ∇²dObj += λ*C
@@ -63,19 +63,18 @@ end
 
 Product of the dual objective Hessian with a vector `z`
 
-    Hz ← ∇²d(y)z = τ A∇²log∑exp(A'y)Az + λCz,
+    Hz ← ∇²d(y)z = τ A∇²kernel(A'y)Az + λCz,
 
 where `y` is the point at which the objective was last evaluated.
 """
 function dHess_prod!(kl::KLLSModel, z, Hz)
-    @unpack A, λ, C, nbuf, lse, scale = kl
+    @unpack A, λ, C, nbuf, kernel, scale = kl
     w = nbuf
     increment!(kl, :neval_jprod)
     increment!(kl, :neval_jtprod)
-    g = grad(lse)
-    mul!(w, A', z)                 # w =                  A'z
-    w .= g.*(w .- (g⋅w))           # w =        (G - gg')(A'z)
-    mul!(Hz, A, w, scale, 0)       # v = scale*A(G - gg')(A'z)
+    mul!(w, A', z)                 # w =              A'z
+    hessvp!(kernel, w)             # w =        (∇²k)(A'z) 
+    mul!(Hz, A, w, scale, 0)       # v = scale*A(∇²k)(A'z)
     if λ > 0
         mul!(Hz, C, z, λ, 1)       # v += λCz
     end
@@ -146,7 +145,7 @@ function solve!(
         trunk_stats = SolverCore.solve!(solver, kl; M=M, callback=cb, atol=zero(T), rtol=zero(T)) 
     end
 
-    x = kl.scale .* grad(kl.lse)
+    x = (kl.scale).*grad(kl.kernel)
 
     stats = ExecutionStats(
         trunk_stats.status,

diff --git a/src/newtonls.jl b/src/newtonls.jl
@@ -61,7 +61,7 @@ function newton_opt(
         dObj, dGrd, dHes = evaldual(y)
 
     end
-    return grad(kl.lse), y, tracer
+    return grad(kl.kernel), y, tracer
 end
 
 function armijo(f, ∇fx, x, d; μ=1e-5, α=1, ρ=0.5, maxits=10)

diff --git a/src/precon.jl b/src/precon.jl
@@ -62,8 +62,8 @@ See also [`mul!`](@ref), [`ldiv!`](@ref), and [`update!`](@ref).
 """
 function DiagASAPreconditioner(kl::KLLSModel{T}; α::T=zero(T)) where T
     @unpack A, b, λ = kl
-    obj!(kl.lse, A'b)
-    d = diag_ASA!(similar(b), A, grad(kl.lse), α)
+    obj!(kl.kernel, A'b)
+    d = diag_ASA!(similar(b), A, grad(kl.kernel), α)
     DiagASAPreconditioner(kl, d, α)
 end
 
@@ -92,7 +92,7 @@ function diag_ASA!(d::AbstractVector{T}, A, g, λ) where T
 end
 
 function update!(P::DiagASAPreconditioner)
-    d = P.d; A = P.kl.A; α = P.α; g = grad(P.kl.lse)
+    d = P.d; A = P.kl.A; α = P.α; g = grad(P.kl.kernel)
     diag_ASA!(d, A, g, α)
 end
 

diff --git a/src/selfscale.jl b/src/selfscale.jl
@@ -14,15 +14,15 @@ where
 function NLPModels.residual!(ss::SSModel, yt, Fx)
 	increment!(ss, :neval_residual)
     kl = ss.kl
-	@unpack A, c, lse = kl
+	@unpack A, c, kernel = kl
 	m = kl.meta.nvar
 
     r = @view Fx[1:m]
 	y = @view yt[1:m]
 	t =       yt[end]
 
     scale!(kl, t)             # Apply the latest scaling factor
-    f = lseatyc!(kl, y)       # f = logΣexp(A'y). Needed before grad eval
+    f = kernelatyc!(kl, y)    # f = kernel(A'y). Needed before grad eval
 	dGrad!(kl, y, r)          # r ≡ Fx[1:m] = ∇d(y)	
 	Fx[end] = f - log(t) - 1  # optimal scaling condition
 	return Fx
@@ -39,10 +39,10 @@ Compute the Jacobian-vector product,
 function NLPModels.jprod_residual!(ss::SSModel, yt, zα, Jyt)
 
     kl = ss.kl
-    @unpack A, lse, mbuf = kl
+    @unpack A, kernel, mbuf = kl
     Ax = mbuf
     m = kl.meta.nvar
-    x = grad(lse)
+    x = grad(kernel)
 
     increment!(ss, :neval_jprod_residual)
 
@@ -108,7 +108,7 @@ function solve!(
     optimality = sqrt(obj(ss, trunk_stats.solution))
 
     kl = ss.kl
-    x = kl.scale.*grad(kl.lse)
+    x = kl.scale.*grad(kl.kernel)
     y = @view trunk_stats.solution[1:end-1]
     stats = ExecutionStats(
         trunk_stats.status,          # status
@@ -276,7 +276,7 @@ function solve!(
 
     λ = kl.λ
     y = @view nlcache.u[1:m]
-    x = kl.scale.*grad(kl.lse)
+    x = kl.scale.*grad(kl.kernel)
     ∇d = @view nlcache.fu[1:m]
     r = λ*y
     stats = ExecutionStats(

diff --git a/src/sequential-scale.jl b/src/sequential-scale.jl
@@ -8,7 +8,7 @@ function value!(kl::KLLSModel, t; jprods=Int[0], jtprods=Int[0], kwargs...)
     scale!(kl, t)
     s = solve!(kl; kwargs...)
     y = s.residual/λ
-    dv = obj!(kl.lse, A'y) - log(t) - 1
+    dv = obj!(kl.kernel, A'y) - log(t) - 1
     jprods[1] += neval_jprod(kl)
     jtprods[1] += neval_jtprod(kl)
     update_y0!(kl, s.residual ./ kl.λ) # Set the next runs starting point to the radial projection

diff --git a/test/test-precon.jl b/test/test-precon.jl
@@ -43,7 +43,7 @@ import Krylov: cg
 
     # DiagASAtPreconditioner
     M = KLLS.DiagASAPreconditioner(data)
-    g = KLLS.grad(data.lse)
+    g = KLLS.grad(data.kernel)
     S = Diagonal(g)
     P = Diagonal(A*S*A')
     @test all(P*d ≈ mul!(similar(d), M, d))