Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/KLLS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ using NonlinearSolve, LinearSolve
import SciMLBase: ReturnCode

export KLLSModel, SSModel, OTModel, LPModel
export NewtonEQ, SSTrunkLS, SequentialSolve, LevelSet, AdaptiveLevelSet
export NewtonEQ, SSTrunkLS, SequentialSolve, LevelSet, AdaptiveLevelSet, ExpKernel
export solve!, scale!, regularize!, histogram, maximize!, reset!, update_y0!

DEFAULT_PRECISION(T) = (eps(T))^(1/3)

include("logsumexp.jl")
include("exp-kernel.jl")
include("klls-model.jl")
include("ss-model.jl")
include("newtoncg.jl")
Expand Down
65 changes: 65 additions & 0 deletions src/exp-kernel.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
struct ExpKernel{T<:AbstractFloat, V1<:AbstractVector{T}, V2<:AbstractVector{T}}
q::V1 # prior
g::V2 # buffer for gradient
end

"""
Constructor for the ExpKernel object.

If an n-vector of priors `q` is available:

ExpKernel(q)

If no prior is known, instead provide the dimension `n`:

ExpKernel(n)
"""
function ExpKernel(q::AbstractVector)
@assert (all(ζ -> ζ ≥ 0, q)) "prior is not nonnegative"
ExpKernel(q, similar(q))
end

"""
Evaluate ∑exp, its gradient, and Hessian at `p`:

f = obj!(expk, p)

where `p` is a vector of length `n` and `expk` is a ExpKernel object.
"""
function obj!(expk::ExpKernel{T}, p) where T
@unpack q, g = expk
@. g = q .* exp(p)
@. g = g ./ exp(one(T))
return sum(g)
end

"""
Get the gradient of ∑exp at the point `p` where
the `expk` objective was last evaluated:

g = grad(expk)
"""
grad(expk::ExpKernel) = expk.g

"""
Get the Hessian of ∑exp at the point `p` where
the `expk` objective was last evaluated:

H = hess(expk)
"""
function hess(expk::ExpKernel)
g = expk.g
return Diagonal(g)
end

"""
Get the Hessian vector product of logΣexp at the point `p`
where the `expk` objective was last evaluated:

Hz = hessvp!(expk, z)
"""
function hessvp!(expk::ExpKernel{T}, z::AbstractVector{T}) where T
g = expk.g
z .= g.*z
return z
end
6 changes: 3 additions & 3 deletions src/klls-model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ Structure for KLLS model
- `A` is the matrix of constraints
- `b` is the right-hand side
- `q` is the prior (default: uniform)
- `lse` is the log-sum-exp function
- `kernel` is the kernel used to regularize (default: LSE for simplex)
- `λ` is the regularization parameter
- `C` is a positive definite scaling matrix
- `w` is an n-buffer for the Hessian-vector product
- `bNrm` is the norm of the right-hand side
- `name` is the name of the problem
"""
@kwdef mutable struct KLLSModel{T<:AbstractFloat, M, CT, SB<:AbstractVector{T}, S<:AbstractVector{T}} <: AbstractNLPModel{T, S}
@kwdef mutable struct KLLSModel{T<:AbstractFloat, M, CT, SB<:AbstractVector{T}, S<:AbstractVector{T}, K} <: AbstractNLPModel{T, S}
A::M
b::SB
c::S = begin
Expand All @@ -29,7 +29,7 @@ Structure for KLLS model
nbuf::S = similar(q)
bNrm::T = norm(b)
scale::T = one(eltype(A))
lse::LogExpFunction = LogExpFunction(q)
kernel::K = LogExpFunction(q)
name::String = ""
meta::NLPModelMeta{T, S} = begin
m = size(A, 1)
Expand Down
4 changes: 2 additions & 2 deletions src/level-set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ function oracle_callback(
trace::Bool=false,
) where {T}
y = solver.x
x = kl.scale * grad(kl.lse)
x = kl.scale * grad(kl.kernel)
dObj = -trunk_stats.objective - σ
iter = trunk_stats.iter
r = trunk_stats.dual_feas # = ||∇ dual obj(x)||
Expand Down Expand Up @@ -322,7 +322,7 @@ function oracle_callback(
elseif tired
trunk_stats.status = :max_iter
elseif pObj < α * dObj && dObj > 0
st = -obj!(kl.lse, kl.A'y) + log(kl.scale) + 1
st = -obj!(kl.kernel, kl.A'y) + log(kl.scale) + 1
ret .= [dObj, pObj, st]
trunk_stats.status = :user # Ends the oracle iterations
end
Expand Down
12 changes: 12 additions & 0 deletions src/logsumexp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,15 @@ function hess(lse::LogExpFunction)
g = lse.g
return Diagonal(g) - g * g'
end

"""
Get the Hessian vector product of logΣexp at the point `p`
where the `lse` objective was last evaluated:

Hz = hessvp!(lse, z)
"""
function hessvp!(lse::LogExpFunction{T}, z::AbstractVector{T}) where T
g = lse.g
z .= g.*(z .- (dot(g, z)))
return z
end
39 changes: 19 additions & 20 deletions src/newtoncg.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
"""
Compute f(y) = logΣexp(A'y - c) and it's gradient,
stored in the `lse` internal buffer.
Compute f(y) = kernel(A'y - c) and it's gradient,
stored in the `kernel` internal buffer.
"""
function lseatyc!(kl, y)
@unpack A, c, nbuf, lse = kl
function kernelatyc!(kl, y)
@unpack A, c, nbuf, kernel = kl
nbuf .= c
mul!(nbuf, A', y, 1, -1)
return obj!(lse, nbuf)
return obj!(kernel, nbuf)
end

"""
Dual objective:

- base case (no scaling, unweighted 2-norm):
f(y) = log∑exp(A'y - c) - 0.5λ y∙Cy - b∙y
f(y) = kernel(A'y - c) - 0.5λ y∙Cy - b∙y

- with scaling and weighted 2-norm:
f(y) = τ log∑exp(A'y - c) - τ log τ + 0.5λ y∙Cy - b∙y
f(y) = τ kernel(A'y - c) - τ log τ + 0.5λ y∙Cy - b∙y
"""
function dObj!(kl::KLLSModel, y)
@unpack b, λ, C, scale = kl
increment!(kl, :neval_jtprod)
f = lseatyc!(kl, y)
f = kernelatyc!(kl, y)
return scale*f - scale*log(scale) + 0.5λ*dot(y, C, y) - b⋅y
end

Expand All @@ -30,14 +30,14 @@ NLPModels.obj(kl::KLLSModel, y) = dObj!(kl, y)
"""
Dual objective gradient

∇f(y) = τ A∇log∑exp(A'y-c) + λCy - b
∇f(y) = τ A∇kernel(A'y-c) + λCy - b

evaluated at `y`. Assumes that the objective was last evaluated at the same point `y`.
"""
function dGrad!(kl::KLLSModel, y, ∇f)
@unpack A, b, λ, C, lse, scale = kl
@unpack A, b, λ, C, kernel, scale = kl
increment!(kl, :neval_jprod)
p = grad(lse)
p = grad(kernel)
∇f .= -b
if λ > 0
mul!(∇f, C, y, λ, 1)
Expand All @@ -49,8 +49,8 @@ end
NLPModels.grad!(kl::KLLSModel, y, ∇f) = dGrad!(kl, y, ∇f)

function dHess(kl::KLLSModel)
@unpack A, λ, C, lse, scale = kl
H = hess(lse)
@unpack A, λ, C, kernel, scale = kl
H = hess(kernel)
∇²dObj = scale*(A*H*A')
if λ > 0
∇²dObj += λ*C
Expand All @@ -63,19 +63,18 @@ end

Product of the dual objective Hessian with a vector `z`

Hz ← ∇²d(y)z = τ A∇²log∑exp(A'y)Az + λCz,
Hz ← ∇²d(y)z = τ A∇²kernel(A'y)Az + λCz,

where `y` is the point at which the objective was last evaluated.
"""
function dHess_prod!(kl::KLLSModel, z, Hz)
@unpack A, λ, C, nbuf, lse, scale = kl
@unpack A, λ, C, nbuf, kernel, scale = kl
w = nbuf
increment!(kl, :neval_jprod)
increment!(kl, :neval_jtprod)
g = grad(lse)
mul!(w, A', z) # w = A'z
w .= g.*(w .- (g⋅w)) # w = (G - gg')(A'z)
mul!(Hz, A, w, scale, 0) # v = scale*A(G - gg')(A'z)
mul!(w, A', z) # w = A'z
hessvp!(kernel, w) # w = (∇²k)(A'z)
mul!(Hz, A, w, scale, 0) # v = scale*A(∇²k)(A'z)
if λ > 0
mul!(Hz, C, z, λ, 1) # v += λCz
end
Expand Down Expand Up @@ -146,7 +145,7 @@ function solve!(
trunk_stats = SolverCore.solve!(solver, kl; M=M, callback=cb, atol=zero(T), rtol=zero(T))
end

x = kl.scale .* grad(kl.lse)
x = (kl.scale).*grad(kl.kernel)

stats = ExecutionStats(
trunk_stats.status,
Expand Down
2 changes: 1 addition & 1 deletion src/newtonls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ function newton_opt(
dObj, dGrd, dHes = evaldual(y)

end
return grad(kl.lse), y, tracer
return grad(kl.kernel), y, tracer
end

function armijo(f, ∇fx, x, d; μ=1e-5, α=1, ρ=0.5, maxits=10)
Expand Down
6 changes: 3 additions & 3 deletions src/precon.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ See also [`mul!`](@ref), [`ldiv!`](@ref), and [`update!`](@ref).
"""
function DiagASAPreconditioner(kl::KLLSModel{T}; α::T=zero(T)) where T
@unpack A, b, λ = kl
obj!(kl.lse, A'b)
d = diag_ASA!(similar(b), A, grad(kl.lse), α)
obj!(kl.kernel, A'b)
d = diag_ASA!(similar(b), A, grad(kl.kernel), α)
DiagASAPreconditioner(kl, d, α)
end

Expand Down Expand Up @@ -92,7 +92,7 @@ function diag_ASA!(d::AbstractVector{T}, A, g, λ) where T
end

function update!(P::DiagASAPreconditioner)
d = P.d; A = P.kl.A; α = P.α; g = grad(P.kl.lse)
d = P.d; A = P.kl.A; α = P.α; g = grad(P.kl.kernel)
diag_ASA!(d, A, g, α)
end

Expand Down
12 changes: 6 additions & 6 deletions src/selfscale.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ where
function NLPModels.residual!(ss::SSModel, yt, Fx)
increment!(ss, :neval_residual)
kl = ss.kl
@unpack A, c, lse = kl
@unpack A, c, kernel = kl
m = kl.meta.nvar

r = @view Fx[1:m]
y = @view yt[1:m]
t = yt[end]

scale!(kl, t) # Apply the latest scaling factor
f = lseatyc!(kl, y) # f = logΣexp(A'y). Needed before grad eval
f = kernelatyc!(kl, y) # f = kernel(A'y). Needed before grad eval
dGrad!(kl, y, r) # r ≡ Fx[1:m] = ∇d(y)
Fx[end] = f - log(t) - 1 # optimal scaling condition
return Fx
Expand All @@ -39,10 +39,10 @@ Compute the Jacobian-vector product,
function NLPModels.jprod_residual!(ss::SSModel, yt, zα, Jyt)

kl = ss.kl
@unpack A, lse, mbuf = kl
@unpack A, kernel, mbuf = kl
Ax = mbuf
m = kl.meta.nvar
x = grad(lse)
x = grad(kernel)

increment!(ss, :neval_jprod_residual)

Expand Down Expand Up @@ -108,7 +108,7 @@ function solve!(
optimality = sqrt(obj(ss, trunk_stats.solution))

kl = ss.kl
x = kl.scale.*grad(kl.lse)
x = kl.scale.*grad(kl.kernel)
y = @view trunk_stats.solution[1:end-1]
stats = ExecutionStats(
trunk_stats.status, # status
Expand Down Expand Up @@ -276,7 +276,7 @@ function solve!(

λ = kl.λ
y = @view nlcache.u[1:m]
x = kl.scale.*grad(kl.lse)
x = kl.scale.*grad(kl.kernel)
∇d = @view nlcache.fu[1:m]
r = λ*y
stats = ExecutionStats(
Expand Down
2 changes: 1 addition & 1 deletion src/sequential-scale.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ function value!(kl::KLLSModel, t; jprods=Int[0], jtprods=Int[0], kwargs...)
scale!(kl, t)
s = solve!(kl; kwargs...)
y = s.residual/λ
dv = obj!(kl.lse, A'y) - log(t) - 1
dv = obj!(kl.kernel, A'y) - log(t) - 1
jprods[1] += neval_jprod(kl)
jtprods[1] += neval_jtprod(kl)
update_y0!(kl, s.residual ./ kl.λ) # Set the next runs starting point to the radial projection
Expand Down
2 changes: 1 addition & 1 deletion test/test-precon.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ import Krylov: cg

# DiagASAtPreconditioner
M = KLLS.DiagASAPreconditioner(data)
g = KLLS.grad(data.lse)
g = KLLS.grad(data.kernel)
S = Diagonal(g)
P = Diagonal(A*S*A')
@test all(P*d ≈ mul!(similar(d), M, d))
Expand Down
Loading