diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..306b813 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ + +Manifest.toml + +.vscode/ diff --git a/example.jl b/example.jl index 8a7dd7b..589f450 100644 --- a/example.jl +++ b/example.jl @@ -54,4 +54,5 @@ function NLPModels.hprod!(nlp :: ExampleMONLP, i :: Integer, x :: AbstractVector Hv[2] = -400 * x[1] * v[1] + 200 * v[2] end return Hv -end \ No newline at end of file +end + diff --git a/src/MultObjNLPModels.jl b/src/MultObjNLPModels.jl index 20bdc52..2783a7b 100644 --- a/src/MultObjNLPModels.jl +++ b/src/MultObjNLPModels.jl @@ -1,9 +1,21 @@ -module MultObjNLPModels - -using NLPModels - -include("meta.jl") -include("counters.jl") -include("api.jl") - -end # module +module MultObjNLPModels + +# stdlib +using SparseArrays + +# external packages +using FastClosures + +# JSO packages +using LinearOperators, NLPModels + +include("api.jl") # Defines the model and the functions +include("meta.jl") # Defines the derived meta structure +include("counters.jl") + +include("auxiliary.jl") + +include("regression_model.jl") +include("sthocastic_gradient.jl") + +end # module diff --git a/src/api.jl b/src/api.jl index edede09..2f7a25e 100644 --- a/src/api.jl +++ b/src/api.jl @@ -1,141 +1,243 @@ -export AbstractMultObjModel, - obj, grad!, hess_structure!, hess_coord!, hprod!, - increment! - -""" - AbstractMultObjModel - -`AbstractMultObjModel` is an abstract type for problems with multiple objective. -The default way of looking into these models is by defining the objective - -```math -f(x) = ∑ᵢ σᵢ fᵢ(x) -```` - -An `AbstractMultObjModel` is expected to have: -- `meta :: MultObjNLPMeta` -""" -abstract type AbstractMultObjModel <: AbstractNLPModel end - -sum_counters(nls :: AbstractMultObjModel) = sum_counters(nls.counters) - -import Base.show -show_header(io :: IO, nlp :: AbstractMultObjModel) = println(io, typeof(nlp)) - -function show(io :: IO, nlp :: AbstractMultObjModel) - show_header(io, nlp) - show(io, nlp.meta) - show(io, nlp.counters) -end - -for counter in fieldnames(MultObjCounters) - counter == :counters && continue - @eval begin - """ - $($counter)(nlp) - - Get the number of `$(split("$($counter)", "_")[2])` evaluations. - """ - $counter(nls :: AbstractMultObjModel) = nls.counters.$counter - export $counter - end -end - -for counter in fieldnames(Counters) - @eval begin - $counter(nls :: AbstractMultObjModel) = nls.counters.counters.$counter - export $counter - end -end - -""" - increment!(nlp, s, i) - -Increment counter `s[i]` of problem `nlp`. -""" -function increment!(nlp :: AbstractMultObjModel, s :: Symbol, i :: Integer) - getproperty(nlp.counters, s)[i] += 1 -end - -function NLPModels.reset!(nls :: AbstractMultObjModel) - reset!(nls.counters) - return nls -end - -# New multiple objective functions: - -""" - f = obj(nlp, i, x) - -Evaluate ``fᵢ(x)``, the i-th objective function of `nlp` at `x`. -""" -function obj end - -""" - g = grad!(nlp, i, x, g) - -Evaluate ``∇fᵢ(x)``, the gradient of the i-th objective function at `x` in place. -""" -function grad! end - -""" - hess_structure!(nlp, i, rows, cols) - -Return the structure of the Lagrangian Hessian in sparse coordinate format in place. -""" -function hess_structure! end - -""" - vals = hess_coord!(nlp, i, x, vals) - -Evaluate the i-th objective Hessian at `x` in sparse coordinate format, -i.e., ``∇²fᵢ(x)``, rewriting `vals`. -Only the lower triangle is returned. -""" -function hess_coord! end - -""" - Hv = hprod!(nlp, i, x, v, Hv) - -Evaluate the product of the i-th objective Hessian at `x` with the vector `v` in -place, where the objective Hessian is ``∇²fᵢ(x)``. -""" -function hprod! end - -# NLPModels functions - -function NLPModels.obj(nlp :: AbstractMultObjModel, x :: AbstractVector) - return sum(obj(nlp, i, x) * nlp.meta.weights[i] for i = 1:nlp.meta.nobj) -end - -function NLPModels.grad!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, g :: AbstractVector) where T - fill!(g, zero(T)) - gi = fill!(similar(g), zero(T)) - for i = 1:nlp.meta.nobj - grad!(nlp, i, x, gi) - g .+= nlp.meta.weights[i] * gi - end - return g -end - -function NLPModels.hess_structure!(nlp :: AbstractMultObjModel, hrows :: AbstractVector{<: Integer}, hcols :: AbstractVector{<: Integer}) -end - -function NLPModels.hess_coord!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, hvals :: AbstractVector; obj_weight :: Real=one(T)) where T -end - -function NLPModels.hess_coord!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, y :: AbstractVector, hvals :: AbstractVector; obj_weight :: Real=one(T)) where T -end - -function NLPModels.hprod!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Real=one(T)) where T - fill!(Hv, zero(T)) - Hiv = fill!(similar(Hv), zero(T)) - for i = 1:nlp.meta.nobj - hprod!(nlp, i, x, v, Hiv) - Hv .+= nlp.meta.weights[i] * Hiv - end - return Hv -end - -function NLPModels.hprod!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, y :: AbstractVector, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Real=one(T)) where T +export AbstractMultObjModel + +""" + AbstractMultObjModel + +`AbstractMultObjModel` is an abstract type for problems with multiple objective. +The default way of looking into these models is by defining the objective + +```math +f(x) = ∑ᵢ σᵢ fᵢ(x) +```` + +An `AbstractMultObjModel` is expected to have: +- `meta :: MultObjNLPMeta` +""" +abstract type AbstractMultObjModel <: AbstractNLPModel end + +# New multiple objective functions: + +""" + f = obj(nlp, i, x) + +Evaluate ``fᵢ(x)``, the i-th objective function of `nlp` at `x`. +""" +NLPModels.obj(nlp, i, x) + +""" + g = grad!(nlp, i, x, g) + +Evaluate ``∇fᵢ(x)``, the gradient of the i-th objective function at `x` in place. +""" +NLPModels.grad!(nlp, i, x, g) + +""" + g = grad(nlp, i, x) + +Evaluate ``∇fᵢ(x)``, the gradient of the i-th objective function at `x`. +""" +function NLPModels.grad(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector) + @lencheck nlp.meta.nvar x + NLPModels.@rangecheck 1 nlp.meta.nobj i + g = zeros(eltype(x), nlp.meta.nvar) + grad!(nlp, i, x, g) +end + +""" + grad!(nlp, J, x, g) + +Evaluate ``∑ᵢ∈J ∇fᵢ(x)``, the sum of the gradients of the i-th objective functions at `x` for every `i` in `J`. +Also known as the batch gradient with batch `J`. +""" +function NLPModels.grad!(nlp :: AbstractMultObjModel, J :: AbstractVector{<: Integer}, x :: AbstractVector{T}, g :: AbstractVector) where T + @lencheck nlp.meta.nvar x g + fill!(g, zero(T)) + gi = fill!(similar(g), zero(T)) + for i = J + grad!(nlp, i, x, gi) + g .+= nlp.meta.weights[i] * gi + end + return g +end + +""" + g = grad(nlp, J, x) + +Evaluate ``∑ᵢ∈J ∇fᵢ(x)``, the sum of the gradients of the i-th objective functions at `x` for every `i` in `J`. +Also known as the batch gradient with batch `J`. +""" +function NLPModels.grad!(nlp :: AbstractMultObjModel, J :: AbstractVector{<: Integer}, x :: AbstractVector{T}) where T + g = similar(x) + return grad!(nlp, J, x, g) +end + +""" + hess_structure!(nlp, i, rows, cols) + +Return the structure of the Lagrangian Hessian in sparse coordinate format in place. +""" +NLPModels.hess_structure!(nlp, i, rows, cols) + +""" + hess_structure!(nlp, i) + +Return the structure of the Lagrangian Hessian in sparse coordinate format. +""" +function hess_structure(nlp :: AbstractMultObjModel, i :: Integer) + NLPModels.@rangecheck 1 nlp.meta.nobj i + rows = zeros(Int, nlp.meta.nnzhi[i]) + cols = zeros(Int, nlp.meta.nnzhi[i]) + hess_structure!(nlp, i, rows, cols) +end + +""" + vals = hess_coord!(nlp, i, x, vals) + +Evaluate the i-th objective Hessian at `x` in sparse coordinate format, +i.e., ``∇²fᵢ(x)``, rewriting `vals`. +Only the lower triangle is returned. +""" +NLPModels.hess_coord!(nlp, i, x, vals) + +""" + vals = hess_coord(nlp, i, x) + +Evaluate the i-th objective Hessian at `x` in sparse coordinate format, +i.e., ``∇²fᵢ(x)``. +Only the lower triangle is returned. +""" +function hess_coord(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector) + @lencheck nlp.meta.nvar x + NLPModels.@rangecheck 1 nlp.meta.nobj i + vals = zeros(eltype(x), nlp.meta.nnzhi[i]) + hess_coord!(nlp, i, x, vals) +end + +""" + Hv = hprod!(nlp, i, x, v, Hv) + +Evaluate the product of the i-th objective Hessian at `x` with the vector `v` in +place, where the objective Hessian is ``∇²fᵢ(x)``. +""" +NLPModels.hprod!(nlp, i, x, v, Hv) + +""" + Hv = hprod(nlp, i, x, v) + +Evaluate the product of the i-th objective Hessian at `x` with the vector `v`, +where the objective Hessian is ``∇²fᵢ(x)``. +""" +function hprod(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector, v :: AbstractVector) + @lencheck nlp.meta.nvar x v + NLPModels.@rangecheck 1 nlp.meta.nobj i + Hv = zeros(eltype(x), nlp.meta.nvar) + hprod!(nlp, i, x, v, Hv) +end + +""" + Hx = hess(nlp, i, x) + +Evaluate the i-th objective Hessian at `x` as a sparse matrix. +Only the lower triangle is returned. +""" +function NLPModels.hess(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector) + @lencheck nlp.meta.nvar x + NLPModels.@rangecheck 1 nlp.meta.nobj i + rows, cols = hess_structure(nlp, i) + vals = hess_coord(nlp, i, x) + sparse(rows, cols, vals, nlp.meta.nvar, nlp.meta.nvar) +end + +""" + H = hess_op(nlp, i, x) + +Return the objective Hessian at `x` as a linear operator. +The resulting object may be used as if it were a +matrix, e.g., `H * v`. +""" +function hess_op(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector) + @lencheck nlp.meta.nvar x + NLPModels.@rangecheck 1 nlp.meta.nobj i + prod = @closure v -> hprod(nlp, i, x, v) + return LinearOperator{eltype(x)}(nlp.meta.nvar, nlp.meta.nvar, + true, true, prod, prod, prod) +end + +""" + H = hess_op!(nlp, i, x, Hv) + +Return the objective Hessian at `x` as a linear operator. +The resulting object may be used as if it were a matrix, e.g., `H * v`. +The vector `Hv` is used as preallocated storage for the operation. +""" +function hess_op!(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector, Hv :: AbstractVector) + @lencheck nlp.meta.nvar x Hv + NLPModels.@rangecheck 1 nlp.meta.nobj i + prod = @closure v -> hprod!(nlp, i, x, v, Hv) + return LinearOperator{eltype(x)}(nlp.meta.nvar, nlp.meta.nvar, + true, true, prod, prod, prod) +end + +# NLPModels functions + +function NLPModels.obj(nlp :: AbstractMultObjModel, x :: AbstractVector) + @lencheck nlp.meta.nvar x + return sum(obj(nlp, i, x) * nlp.meta.weights[i] for i = 1:nlp.meta.nobj) +end + +function NLPModels.grad!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, g :: AbstractVector) where T + @lencheck nlp.meta.nvar x g + fill!(g, zero(T)) + gi = fill!(similar(g), zero(T)) + for i = 1:nlp.meta.nobj + grad!(nlp, i, x, gi) + g .+= nlp.meta.weights[i] * gi + end + return g +end + +# ∇²f(x) = ∑ᵢ σᵢ ∇²fᵢ(x) +# rows = [rows₁, …, rowsₚ] +# cols = [cols₁, …, colsₚ] +function NLPModels.hess_structure!(nlp :: AbstractMultObjModel, rows :: AbstractVector{<: Integer}, cols :: AbstractVector{<: Integer}) + @lencheck nlp.meta.nnzh rows cols + c = 0 + for i = 1:nlp.meta.nobj + nnzi = nlp.meta.nnzhi[i] + idx = c .+ (1:nnzi) + @views hess_structure!(nlp, i, rows[idx], cols[idx]) + c += nnzi + end + return rows, cols +end + +function NLPModels.hess_coord!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, vals :: AbstractVector; obj_weight :: Real=one(T)) where T + @lencheck nlp.meta.nvar x + @lencheck nlp.meta.nnzh vals + c = 0 + for i = 1:nlp.meta.nobj + nnzi = nlp.meta.nnzhi[i] + idx = c .+ (1:nnzi) + @views hess_coord!(nlp, i, x, vals[idx]) + vals[idx] .*= obj_weight * nlp.meta.weights[i] + c += nnzi + end + return vals +end + +function NLPModels.hess_coord!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, y :: AbstractVector, vals :: AbstractVector; obj_weight :: Real=one(T)) where T +end + +function NLPModels.hprod!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Real=one(T)) where T + @lencheck nlp.meta.nvar x v Hv + fill!(Hv, zero(T)) + Hiv = fill!(similar(Hv), zero(T)) + for i = 1:nlp.meta.nobj + hprod!(nlp, i, x, v, Hiv) + Hv .+= nlp.meta.weights[i] * Hiv + end + return Hv +end + +function NLPModels.hprod!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, y :: AbstractVector, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Real=one(T)) where T end \ No newline at end of file diff --git a/src/auxiliary.jl b/src/auxiliary.jl new file mode 100644 index 0000000..c1904be --- /dev/null +++ b/src/auxiliary.jl @@ -0,0 +1,61 @@ +export increment! + +sum_counters(nls :: AbstractMultObjModel) = sum_counters(nls.counters) + +import Base.show +show_header(io :: IO, nlp :: AbstractMultObjModel) = println(io, typeof(nlp)) + +function show(io :: IO, nlp :: AbstractMultObjModel) + show_header(io, nlp) + show(io, nlp.meta) + show(io, nlp.counters) +end + +for counter in fieldnames(MultObjCounters) + counter == :counters && continue + @eval begin + """ + $($counter)(nlp) + + Get the number of `$(split("$($counter)", "_")[2])` evaluations. + """ + $counter(nls :: AbstractMultObjModel) = nls.counters.$counter + export $counter + end +end + +for counter in fieldnames(Counters) + @eval begin + NLPModels.$counter(nls :: AbstractMultObjModel) = nls.counters.counters.$counter + export $counter + end +end + +import NLPModels.increment! + +""" + increment!(nlp, s, i) + +Increment counter `s[i]` of problem `nlp`. +""" +function increment!(nlp :: AbstractMultObjModel, s :: Symbol, i :: Integer) + getproperty(nlp.counters, s)[i] += 1 +end + +function NLPModels.increment!(nlp :: AbstractMultObjModel, s :: Symbol) + setproperty!(nlp.counters.counters, s, getproperty(nlp.counters.counters, s) + 1) +end + +function NLPModels.reset!(nls :: AbstractMultObjModel) + reset!(nls.counters) + return nls +end + +# TODO: Make these functions more general inside NLPModels.jl: + +NLPModels.has_bounds(meta::MultObjNLPMeta) = length(meta.ifree) < meta.nvar +NLPModels.bound_constrained(meta::MultObjNLPMeta) = meta.ncon == 0 && has_bounds(meta) +NLPModels.unconstrained(meta::MultObjNLPMeta) = meta.ncon == 0 && !has_bounds(meta) +NLPModels.linearly_constrained(meta::MultObjNLPMeta) = meta.nlin == meta.ncon > 0 +NLPModels.equality_constrained(meta::MultObjNLPMeta) = length(meta.jfix) == meta.ncon > 0 +NLPModels.inequality_constrained(meta::MultObjNLPMeta) = meta.ncon > 0 && length(meta.jfix) == 0 \ No newline at end of file diff --git a/src/counters.jl b/src/counters.jl index f7284d3..5cc517c 100644 --- a/src/counters.jl +++ b/src/counters.jl @@ -1,48 +1,36 @@ -export MultObjCounters - -mutable struct MultObjCounters - counters :: Counters - neval_obji :: Vector{Int} # Number of individual objective evaluations - neval_gradi :: Vector{Int} # Number of individual objective gradient evaluations. - neval_hessi :: Vector{Int} # Number of individual objective Hessian evaluations. - neval_hiprod :: Vector{Int} # Number of individual objective Hessian-vector products. - - function MultObjCounters(nobj) - return new(Counters(), zeros(Int, nobj), zeros(Int, nobj), zeros(Int, nobj), zeros(Int, nobj)) - end -end - -import Base.getproperty, Base.setproperty! -function getproperty(c :: MultObjCounters, f :: Symbol) - if f in fieldnames(Counters) - getfield(c.counters, f) - else - getfield(c, f) - end -end - -function setproperty!(c :: MultObjCounters, f :: Symbol, x) - if f in fieldnames(Counters) - setfield!(c.counters, f, x) - else - setfield!(c, f, x) - end -end - -function sum_counters(c :: MultObjCounters) - s = sum_counters(c.counters) - for field in fieldnames(MultObjCounters) - field == :counters && continue - s += sum(getfield(c, field)) - end - return s -end - -function NLPModels.reset!(c :: MultObjCounters) - for f in fieldnames(MultObjCounters) - f == :counters && continue - fill!(getfield(c, f), 0) - end - reset!(c.counters) - return c -end +export MultObjCounters + +mutable struct MultObjCounters + counters :: Counters + neval_obji :: Vector{Int} # Number of individual objective evaluations + neval_gradi :: Vector{Int} # Number of individual objective gradient evaluations. + neval_hessi :: Vector{Int} # Number of individual objective Hessian evaluations. + neval_hiprod :: Vector{Int} # Number of individual objective Hessian-vector products. + + function MultObjCounters(nobj) + return new(Counters(), zeros(Int, nobj), zeros(Int, nobj), zeros(Int, nobj), zeros(Int, nobj)) + end +end + +import Base.getproperty, Base.setproperty! + +function sum_counters(c :: MultObjCounters) + s = 0 + for field in fieldnames(Counters) + s += getfield(c.counters, field) + end + for field in fieldnames(MultObjCounters) + field == :counters && continue + s += sum(getfield(c, field)) + end + return s +end + +function NLPModels.reset!(c :: MultObjCounters) + for f in fieldnames(MultObjCounters) + f == :counters && continue + fill!(getfield(c, f), 0) + end + reset!(c.counters) + return c +end diff --git a/src/linear-regression-model.jl b/src/linear-regression-model.jl new file mode 100644 index 0000000..9490c58 --- /dev/null +++ b/src/linear-regression-model.jl @@ -0,0 +1,49 @@ +export LinearRegressionModel + +mutable struct LinearRegressionModel <: AbstractRegressionModel + meta :: MultObjNLPMeta + counters :: MultObjCounters + + X :: Matrix + y :: Vector + h # h(x, w) + ℓ # ℓ(y, ŷ) +end + +function LinearRegressionModel(X, y) + n, p = size(X) + + meta = MultObjNLPMeta(p, n) + counters = MultObjCounters(n) + + h(x, β) = dot(x, β) + ℓ(y, ŷ) = (y - ŷ)^2 / 2 + return LinearRegressionModel(meta, counters, X, y, h, ℓ) +end + +function NLPModels.obj(nlp :: LinearRegressionModel, i :: Integer, β :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_obji, i) + return @views (nlp.y[i] - dot(nlp.X[i,:], β)) ^ 2 / 2 +end + +function NLPModels.grad!(nlp :: LinearRegressionModel, i :: Integer, β :: AbstractVector, g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_gradi, i) + xi = @view nlp.X[i,:] + g .= (dot(xi, β) - nlp.y[i]) * xi + return g +end + +function NLPModels.grad!(nlp :: LinearRegressionModel, J :: AbstractVector{<:Integer}, β :: AbstractVector, g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + for i in J + increment!(nlp, :neval_gradi, i) + end + XJ = @view nlp.X[J,:] + yJ = @view nlp.y[J] + g .= XJ' * (XJ * β - yJ) + return g +end \ No newline at end of file diff --git a/src/logistic-regression-model.jl b/src/logistic-regression-model.jl new file mode 100644 index 0000000..f86fe2f --- /dev/null +++ b/src/logistic-regression-model.jl @@ -0,0 +1,52 @@ +export LogisticRegressionModel + +mutable struct LogisticRegressionModel <: AbstractRegressionModel + meta :: MultObjNLPMeta + counters :: MultObjCounters + + X :: Matrix + y :: Vector + h # h(x, w) + ℓ # ℓ(y, ŷ) +end + +function LogisticRegressionModel(X, y) + n, p = size(X) + + meta = MultObjNLPMeta(p, n) + counters = MultObjCounters(n) + + h(x, β) = 1 / (1 + exp(-dot(x, β))) + ℓ(y, ŷ) = -y * log(ŷ + 1e-8) - (1 - y) * log(1 - ŷ + 1e-8) + return LogisticRegressionModel(meta, counters, X, y, h, ℓ) +end + +function NLPModels.obj(nlp :: LogisticRegressionModel, i :: Integer, β :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_obji, i) + xi = @view nlp.X[i,:] + ŷ = 1 / (1 + exp(-dot(xi, β))) + yi = nlp.y[i] + return -yi * log(ŷ + 1e-8) - (1 - yi) * log(1 - ŷ + 1e-8) +end + +function NLPModels.grad!(nlp :: LogisticRegressionModel, i :: Integer, β :: AbstractVector, g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_gradi, i) + xi = @view nlp.X[i,:] + g .= (1 / (1 + exp(-dot(xi, β))) - nlp.y[i]) * xi + return g +end + +function NLPModels.grad!(nlp :: LogisticRegressionModel, J :: AbstractVector{<:Integer}, β :: AbstractVector, g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + for i in J + increment!(nlp, :neval_gradi, i) + end + XJ = @view nlp.X[J,:] + yJ = @view nlp.y[J] + g .= XJ' * (1 ./ (1 .+ exp.(-XJ * β)) .- yJ) + return g +end \ No newline at end of file diff --git a/src/meta.jl b/src/meta.jl index 3ab2696..4855ca7 100644 --- a/src/meta.jl +++ b/src/meta.jl @@ -1,192 +1,192 @@ -export MultObjNLPMeta - -import NLPModels.@rangecheck - -struct MultObjNLPMeta <: AbstractNLPModelMeta - - # A composite type that represents the main features of - # the optimization problem - # - # optimize obj(x) = ∑ᵢ σᵢ objᵢ(x) - # subject to lvar ≤ x ≤ uvar - # lcon ≤ cons(x) ≤ ucon - # - # where x is an nvar-dimensional vector, - # σᵢ are the weights of the objectives, - # objᵢ are each real-valued objective functions, - # cons is the vector-valued constraint function, - # optimize is either "minimize" or "maximize". - # - # Here, lvar, uvar, lcon and ucon are vectors. Some of their - # components may be infinite to indicate that the corresponding - # bound or general constraint is not present. - - nobj :: Int # number of objectives - weights :: Vector # weights of each objective - objtypes :: Vector{Symbol} # types of the objectives (:lin, :quad, :gen) - nnzhi :: Vector{Int} # number of elements needed to store the nonzeros in the sparse Hessian of the i-th objective - - nvar :: Int # number of variables - x0 :: Vector # initial guess - lvar :: Vector # vector of lower bounds - uvar :: Vector # vector of upper bounds - - ifix :: Vector{Int} # indices of fixed variables - ilow :: Vector{Int} # indices of variables with lower bound only - iupp :: Vector{Int} # indices of variables with upper bound only - irng :: Vector{Int} # indices of variables with lower and upper bound (range) - ifree :: Vector{Int} # indices of free variables - iinf :: Vector{Int} # indices of infeasible bounds - - nbv :: Int # number of linear binary variables - niv :: Int # number of linear non-binary integer variables - nlvb :: Int # number of nonlinear variables in both objectives and constraints - nlvo :: Int # number of nonlinear variables in objectives (includes nlvb) - nlvc :: Int # number of nonlinear variables in constraints (includes nlvb) - nlvbi :: Int # number of integer nonlinear variables in both objectives and constraints - nlvci :: Int # number of integer nonlinear variables in constraints only - nlvoi :: Int # number of integer nonlinear variables in objectives only - nwv :: Int # number of linear network (arc) variables - - ncon :: Int # number of general constraints - y0 :: Vector # initial Lagrange multipliers - lcon :: Vector # vector of constraint lower bounds - ucon :: Vector # vector of constraint upper bounds - - jfix :: Vector{Int} # indices of equality constraints - jlow :: Vector{Int} # indices of constraints of the form c(x) ≥ cl - jupp :: Vector{Int} # indices of constraints of the form c(x) ≤ cu - jrng :: Vector{Int} # indices of constraints of the form cl ≤ c(x) ≤ cu - jfree :: Vector{Int} # indices of "free" constraints (there shouldn't be any) - jinf :: Vector{Int} # indices of the visibly infeasible constraints - - nnzo :: Int # number of nonzeros in all objectives gradients - nnzj :: Int # number of elements needed to store the nonzeros in the sparse Jacobian - nnzh :: Int # number of elements needed to store the nonzeros in the sparse Hessian - - nlin :: Int # number of linear constraints - nnln :: Int # number of nonlinear general constraints - nnnet :: Int # number of nonlinear network constraints - nlnet :: Int # number of linear network constraints - - lin :: Vector{Int} # indices of linear constraints - nln :: Vector{Int} # indices of nonlinear constraints - nnet :: Vector{Int} # indices of nonlinear network constraints - lnet :: Vector{Int} # indices of linear network constraints - - minimize :: Bool # true if optimize == minimize - nlo :: Int # number of nonlinear objectives - islp :: Bool # true if the problem is a linear program - name :: String # problem name - - function MultObjNLPMeta(nvar, nobj; - x0=zeros(nvar,), - lvar=-Inf * ones(nvar,), - uvar=Inf * ones(nvar,), - weights=ones(nobj), - objtypes=fill(:gen, nobj), - nnzhi=fill(div(nvar * (nvar + 1), 2), nobj), - nbv=0, - niv=0, - nlvb=nvar, - nlvo=nvar, - nlvc=nvar, - nlvbi=0, - nlvci=0, - nlvoi=0, - nwv=0, - ncon=0, - y0=zeros(ncon,), - lcon=-Inf * ones(ncon,), - ucon=Inf * ones(ncon,), - nnzo=nvar, - nnzj=nvar * ncon, - nnzh=nvar * (nvar + 1) / 2, - lin=Int[], - nln=1:ncon, - nnet=Int[], - lnet=Int[], - nlin=length(lin), - nnln=length(nln), - nnnet=length(nnet), - nlnet=length(lnet), - minimize=true, - nlo=1, - islp=false, - name="Generic") - if (nvar < 1) || (ncon < 0) - error("Nonsensical dimensions") - end - - @lencheck nobj weights objtypes nnzhi - @lencheck nvar x0 lvar uvar - @lencheck ncon y0 lcon ucon - @lencheck nlin lin - @lencheck nnln nln - @lencheck nnnet nnet - @lencheck nlnet lnet - @rangecheck 1 ncon lin nln nnet lnet - if !(unique(objtypes) ⊆ [:lin, :quad, :gen]) - error("The objective types should be chosen from the set [:lin, :quad, :gen]") - end - nnzhi .= max.(0, nnzhi) - - ifix = findall(lvar .== uvar) - ilow = findall((lvar .> -Inf) .& (uvar .== Inf)) - iupp = findall((lvar .== -Inf) .& (uvar .< Inf)) - irng = findall((lvar .> -Inf) .& (uvar .< Inf) .& (lvar .< uvar)) - ifree = findall((lvar .== -Inf) .& (uvar .== Inf)) - iinf = findall(lvar .> uvar) - - jfix = findall(lcon .== ucon) - jlow = findall((lcon .> -Inf) .& (ucon .== Inf)) - jupp = findall((lcon .== -Inf) .& (ucon .< Inf)) - jrng = findall((lcon .> -Inf) .& (ucon .< Inf) .& (lcon .< ucon)) - jfree = findall((lcon .== -Inf) .& (ucon .== Inf)) - jinf = findall(lcon .> ucon) - - nnzj = max(0, nnzj) - nnzh = max(0, nnzh) - - new(nobj, weights, objtypes, nnzhi, - nvar, x0, lvar, uvar, - ifix, ilow, iupp, irng, ifree, iinf, - nbv, niv, nlvb, nlvo, nlvc, - nlvbi, nlvci, nlvoi, nwv, - ncon, y0, lcon, ucon, - jfix, jlow, jupp, jrng, jfree, jinf, - nnzo, nnzj, nnzh, - nlin, nnln, nnnet, nlnet, lin, nln, nnet, lnet, - minimize, nlo, islp, name) - end -end - -import NLPModels: histline, lines_of_hist, sparsityline - -function lines_of_description(m :: MultObjNLPMeta) - V = [length(m.ifree), length(m.ilow), length(m.iupp), length(m.irng), length(m.ifix), length(m.iinf)] - V = [sum(V); V] - S = ["All variables", "free", "lower", "upper", "low/upp", "fixed", "infeas"] - varlines = lines_of_hist(S, V) - push!(varlines, sparsityline("nnzh", m.nnzh, m.nvar * (m.nvar + 1) / 2)) - - V = [length(m.jfree), length(m.jlow), length(m.jupp), length(m.jrng), length(m.jfix), length(m.jinf)] - V = [sum(V); V] - S = ["All constraints", "free", "lower", "upper", "low/upp", "fixed", "infeas"] - conlines = lines_of_hist(S, V) - push!(conlines, histline("linear", m.nlin, m.ncon), histline("nonlinear", m.nnln, m.ncon)) - push!(conlines, sparsityline("nnzj", m.nnzj, m.nvar * m.ncon)) - - append!(varlines, repeat([" "^length(varlines[1])], length(conlines) - length(varlines))) - lines = [" TODO: Improve this"; varlines .* conlines] - - return lines -end - -import Base.show -function show(io :: IO, m :: MultObjNLPMeta) - println(io, " Problem name: $(m.name)") - println(io, " Number of objectives: $(m.nobj)") - lines = lines_of_description(m) - println(io, join(lines, "\n") * "\n") +export MultObjNLPMeta + +import NLPModels.@rangecheck + +struct MultObjNLPMeta <: AbstractNLPModelMeta + + # A composite type that represents the main features of + # the optimization problem + # + # optimize obj(x) = ∑ᵢ σᵢ objᵢ(x) + # subject to lvar ≤ x ≤ uvar + # lcon ≤ cons(x) ≤ ucon + # + # where x is an nvar-dimensional vector, + # σᵢ are the weights of the objectives, + # objᵢ are each real-valued objective functions, + # cons is the vector-valued constraint function, + # optimize is either "minimize" or "maximize". + # + # Here, lvar, uvar, lcon and ucon are vectors. Some of their + # components may be infinite to indicate that the corresponding + # bound or general constraint is not present. + + nobj :: Int # number of objectives + weights :: Vector # weights of each objective + objtypes :: Vector{Symbol} # types of the objectives (:lin, :quad, :gen) + nnzhi :: Vector{Int} # number of elements needed to store the nonzeros in the sparse Hessian of the i-th objective + + nvar :: Int # number of variables + x0 :: Vector # initial guess + lvar :: Vector # vector of lower bounds + uvar :: Vector # vector of upper bounds + + ifix :: Vector{Int} # indices of fixed variables + ilow :: Vector{Int} # indices of variables with lower bound only + iupp :: Vector{Int} # indices of variables with upper bound only + irng :: Vector{Int} # indices of variables with lower and upper bound (range) + ifree :: Vector{Int} # indices of free variables + iinf :: Vector{Int} # indices of infeasible bounds + + nbv :: Int # number of linear binary variables + niv :: Int # number of linear non-binary integer variables + nlvb :: Int # number of nonlinear variables in both objectives and constraints + nlvo :: Int # number of nonlinear variables in objectives (includes nlvb) + nlvc :: Int # number of nonlinear variables in constraints (includes nlvb) + nlvbi :: Int # number of integer nonlinear variables in both objectives and constraints + nlvci :: Int # number of integer nonlinear variables in constraints only + nlvoi :: Int # number of integer nonlinear variables in objectives only + nwv :: Int # number of linear network (arc) variables + + ncon :: Int # number of general constraints + y0 :: Vector # initial Lagrange multipliers + lcon :: Vector # vector of constraint lower bounds + ucon :: Vector # vector of constraint upper bounds + + jfix :: Vector{Int} # indices of equality constraints + jlow :: Vector{Int} # indices of constraints of the form c(x) ≥ cl + jupp :: Vector{Int} # indices of constraints of the form c(x) ≤ cu + jrng :: Vector{Int} # indices of constraints of the form cl ≤ c(x) ≤ cu + jfree :: Vector{Int} # indices of "free" constraints (there shouldn't be any) + jinf :: Vector{Int} # indices of the visibly infeasible constraints + + nnzo :: Int # number of nonzeros in all objectives gradients + nnzj :: Int # number of elements needed to store the nonzeros in the sparse Jacobian + nnzh :: Int # number of elements needed to store the nonzeros in the sparse Hessian + + nlin :: Int # number of linear constraints + nnln :: Int # number of nonlinear general constraints + nnnet :: Int # number of nonlinear network constraints + nlnet :: Int # number of linear network constraints + + lin :: Vector{Int} # indices of linear constraints + nln :: Vector{Int} # indices of nonlinear constraints + nnet :: Vector{Int} # indices of nonlinear network constraints + lnet :: Vector{Int} # indices of linear network constraints + + minimize :: Bool # true if optimize == minimize + nlo :: Int # number of nonlinear objectives + islp :: Bool # true if the problem is a linear program + name :: String # problem name + + function MultObjNLPMeta(nvar, nobj; + x0=zeros(nvar,), + lvar=-Inf * ones(nvar,), + uvar=Inf * ones(nvar,), + weights=ones(nobj), + objtypes=fill(:gen, nobj), + nnzhi=fill(div(nvar * (nvar + 1), 2), nobj), + nbv=0, + niv=0, + nlvb=nvar, + nlvo=nvar, + nlvc=nvar, + nlvbi=0, + nlvci=0, + nlvoi=0, + nwv=0, + ncon=0, + y0=zeros(ncon,), + lcon=-Inf * ones(ncon,), + ucon=Inf * ones(ncon,), + nnzo=nvar, + nnzj=nvar * ncon, + nnzh=nvar * (nvar + 1) / 2, + lin=Int[], + nln=1:ncon, + nnet=Int[], + lnet=Int[], + nlin=length(lin), + nnln=length(nln), + nnnet=length(nnet), + nlnet=length(lnet), + minimize=true, + nlo=1, + islp=false, + name="Generic") + if (nvar < 1) || (ncon < 0) + error("Nonsensical dimensions") + end + + @lencheck nobj weights objtypes nnzhi + @lencheck nvar x0 lvar uvar + @lencheck ncon y0 lcon ucon + @lencheck nlin lin + @lencheck nnln nln + @lencheck nnnet nnet + @lencheck nlnet lnet + @rangecheck 1 ncon lin nln nnet lnet + if !(unique(objtypes) ⊆ [:lin, :quad, :gen]) + error("The objective types should be chosen from the set [:lin, :quad, :gen]") + end + nnzhi .= max.(0, nnzhi) + + ifix = findall(lvar .== uvar) + ilow = findall((lvar .> -Inf) .& (uvar .== Inf)) + iupp = findall((lvar .== -Inf) .& (uvar .< Inf)) + irng = findall((lvar .> -Inf) .& (uvar .< Inf) .& (lvar .< uvar)) + ifree = findall((lvar .== -Inf) .& (uvar .== Inf)) + iinf = findall(lvar .> uvar) + + jfix = findall(lcon .== ucon) + jlow = findall((lcon .> -Inf) .& (ucon .== Inf)) + jupp = findall((lcon .== -Inf) .& (ucon .< Inf)) + jrng = findall((lcon .> -Inf) .& (ucon .< Inf) .& (lcon .< ucon)) + jfree = findall((lcon .== -Inf) .& (ucon .== Inf)) + jinf = findall(lcon .> ucon) + + nnzj = max(0, nnzj) + nnzh = max(0, nnzh) + + new(nobj, weights, objtypes, nnzhi, + nvar, x0, lvar, uvar, + ifix, ilow, iupp, irng, ifree, iinf, + nbv, niv, nlvb, nlvo, nlvc, + nlvbi, nlvci, nlvoi, nwv, + ncon, y0, lcon, ucon, + jfix, jlow, jupp, jrng, jfree, jinf, + nnzo, nnzj, nnzh, + nlin, nnln, nnnet, nlnet, lin, nln, nnet, lnet, + minimize, nlo, islp, name) + end +end + +import NLPModels: histline, lines_of_hist, sparsityline + +function lines_of_description(m :: MultObjNLPMeta) + V = [length(m.ifree), length(m.ilow), length(m.iupp), length(m.irng), length(m.ifix), length(m.iinf)] + V = [sum(V); V] + S = ["All variables", "free", "lower", "upper", "low/upp", "fixed", "infeas"] + varlines = lines_of_hist(S, V) + push!(varlines, sparsityline("nnzh", m.nnzh, m.nvar * (m.nvar + 1) / 2)) + + V = [length(m.jfree), length(m.jlow), length(m.jupp), length(m.jrng), length(m.jfix), length(m.jinf)] + V = [sum(V); V] + S = ["All constraints", "free", "lower", "upper", "low/upp", "fixed", "infeas"] + conlines = lines_of_hist(S, V) + push!(conlines, histline("linear", m.nlin, m.ncon), histline("nonlinear", m.nnln, m.ncon)) + push!(conlines, sparsityline("nnzj", m.nnzj, m.nvar * m.ncon)) + + append!(varlines, repeat([" "^length(varlines[1])], length(conlines) - length(varlines))) + lines = [" TODO: Improve this"; varlines .* conlines] + + return lines +end + +import Base.show +function show(io :: IO, m :: MultObjNLPMeta) + println(io, " Problem name: $(m.name)") + println(io, " Number of objectives: $(m.nobj)") + lines = lines_of_description(m) + println(io, join(lines, "\n") * "\n") end \ No newline at end of file diff --git a/src/regression_model.jl b/src/regression_model.jl new file mode 100644 index 0000000..c7f26d8 --- /dev/null +++ b/src/regression_model.jl @@ -0,0 +1,52 @@ +export RegressionModel, LogisticRegressionModel + +using ForwardDiff, LinearAlgebra + +abstract type AbstractRegressionModel <: AbstractMultObjModel end + +include("linear-regression-model.jl") +include("logistic-regression-model.jl") + +mutable struct RegressionModel <: AbstractRegressionModel + meta :: MultObjNLPMeta + counters :: MultObjCounters + + X :: Matrix + y :: Vector + h # h(x, w) + ℓ # ℓ(y, ŷ) +end + +function RegressionModel(X, y, h, ℓ) + n, p = size(X) + + meta = MultObjNLPMeta(p, n) + counters = MultObjCounters(n) + return RegressionModel(meta, counters, X, y, h, ℓ) +end + +function NLPModels.obj(nlp :: RegressionModel, i :: Integer, β :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_obji, i) + return nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)) +end + +function NLPModels.grad!(nlp :: RegressionModel, i :: Integer, β :: AbstractVector , g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_gradi, i) + ForwardDiff.gradient!(g, β->nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)), β) + return g +end + +function NLPModels.grad!(nlp :: RegressionModel, J :: AbstractVector{<: Integer}, β :: AbstractVector , g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + for i in J + increment!(nlp, :neval_gradi, i) + end + ForwardDiff.gradient!(g, + β -> sum(nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)) for i in J), + β) + return g +end \ No newline at end of file diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl new file mode 100644 index 0000000..5ab02b3 --- /dev/null +++ b/src/sthocastic_gradient.jl @@ -0,0 +1,117 @@ +using LinearAlgebra, Random, SolverTools + +export sthocastic_gradient + +function sthocastic_gradient( + nlp::AbstractMultObjModel; + learning_rate::Symbol = :optimal, + γ::Float64 = 1e-2, + α::Float64 = 1e-2, # for penalty + ρ::Float64 = 0.85, # just for elasticnet + penalty::Symbol = :l2, + max_eval::Int = 0, + max_time::Float64 = 60.0, + max_iter::Int = 100, + atol::Float64 = 1e-8, + rtol::Float64 = 1e-8, + batch_size::Int = 1, + ν::Float64 = 0.5, + d::Float64 = 0.5, # for some learning rate, + r::Int64 = 10, #step to decline + γ0::Float64 = 1.0 # first step + ) + + iter = 0 + start_time = time() + β = nlp.meta.x0 + n = nlp.meta.nobj + p = nlp.meta.nvar + βavg = similar(β) + g = similar(β) + + f = NLPModels.obj(nlp, β) + NLPModels.grad!(nlp, β, g) + + Δt = time() - start_time + + P = if penalty == :l2 + β -> β + elseif penalty == :l1 + β -> sign.(β) + elseif penalty == :elasticnet + β -> ρ * β + (1 - ρ) * sign.(β) + end + + if learning_rate == :optimal && α == 0.0 + @warn("Can't use learning_rate optimal with α = 0.0. Changing to learning_rate step_based.") + learning_rate = :step_based + end + + γ = if learning_rate == :optimal + iter -> γ0 / (α * (1e3 + iter)) + elseif learning_rate == :invscaling + iter -> γ0 / (iter + 1) ^ ν + elseif learning_rate == :time_based + iter -> γ0 / (1 + d * iter) + elseif learning_rate == :step_based + iter -> γ0 * d ^ div(1 + iter, r) + elseif learning_rate == :exponential + iter -> γ0 / exp(d * iter) + end + + status = :unknown + tired = Δt > max_time || sum_counters(nlp) > max_eval > 0 || iter > max_iter + small_step = γ(iter) < 1e-6 + + βavg .= 0 + num_batches = ceil(Int, n / batch_size) + + # betas =[] + # gamas = [] + while !(small_step || tired) + + gnorm = -1.0 + Rindex = shuffle(1:n) + for l = 1:num_batches + index = if l < num_batches + batch_size * (l - 1) .+ (1:batch_size) + else + batch_size * (l - 1) + 1:n + end + + grad!(nlp, Rindex[index], β, g) + β -= γ(iter) * (α * P(β) + g / length(index)) + βavg += β + end + + # append!(betas, β) + # append!(gamas,γ(iter)) + + βavg = βavg / num_batches + Δt = time() - start_time + iter += 1 + tired = Δt > max_time || sum_counters(nlp) > max_eval > 0 || iter > max_iter + small_step = γ(iter) < 1e-6 + end + + status = if small_step + :small_step + elseif tired + if Δt > max_time + :max_time + elseif sum_counters(nlp) > max_eval > 0 + :max_eval + elseif iter > max_iter + :max_iter + end + end + + # return iter, β, betas, gamas + + return GenericExecutionStats(status, nlp; + solution=β, + solver_specific=Dict(:βavg => βavg), + elapsed_time=Δt, + iter=iter + ) +end \ No newline at end of file