From f45bea3d3098857ca7ef39ba61e9bdba745ea767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= Date: Wed, 2 Sep 2020 14:07:03 -0300 Subject: [PATCH 01/21] =?UTF-8?q?Separa=C3=A7=C3=A3o=20dos=20arquivos:=20u?= =?UTF-8?q?m=20dos=20modelos=20e=20outro=20das=20fun=C3=A7=C3=B5es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/settings.json | 3 + example.jl | 2 +- exemplosrogerio.jl | 38 ++++++++++++ src/MultObjNLPModels.jl | 2 + src/gradestoc.jl | 131 ++++++++++++++++++++++++++++++++++++++++ src/regressaomodel.jl | 33 ++++++++++ 6 files changed, 208 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json create mode 100644 exemplosrogerio.jl create mode 100644 src/gradestoc.jl create mode 100644 src/regressaomodel.jl diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e71bc36 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "julia.environmentPath": "c:\\Users\\Acer\\Desktop\\roger\\TCC\\MultObjNLPModels.jl" +} \ No newline at end of file diff --git a/example.jl b/example.jl index 8a7dd7b..3f16f15 100644 --- a/example.jl +++ b/example.jl @@ -54,4 +54,4 @@ function NLPModels.hprod!(nlp :: ExampleMONLP, i :: Integer, x :: AbstractVector Hv[2] = -400 * x[1] * v[1] + 200 * v[2] end return Hv -end \ No newline at end of file +end diff --git a/exemplosrogerio.jl b/exemplosrogerio.jl new file mode 100644 index 0000000..63b75fd --- /dev/null +++ b/exemplosrogerio.jl @@ -0,0 +1,38 @@ +using NLPModels, MultObjNLPModels, Random + +# REGRESSÃO LINEAR +# h(β, x) = dot(β, x) +# ℓ(y, ŷ) = sum((y - ŷ).^2) / 2 +# modelo = RegressaoModel(X, y, h, ℓ) + +n,p = [10,2] + +X = rand(n,p) +y = [sum(X[i,:]).+randn()*0.5 for i=1:n] + +println("y=$y") +output = MultObjNLPModels.RegressaoLinearModel(X,y) + +println() +# println(output) +println() +println(grad_desc(output)) + +#################################################################### + +# REGRESSÃO LOGISTICA + +# n = 50 +# β = ones(2) +# xl = sort(rand(n) * 2 .- 1) +# yl = [x + 0.2 + randn() * 0.25 > 0 ? 1 : 0 for x in xl] +# xl = hcat(ones(n), xl) +# println() +# println("y",yl) + +# pred = MultObjNLPModels.RegressaoLogisticaModel(xl,yl) + +# println(pred) +# println() +# println(grad_estoc(pred)) + diff --git a/src/MultObjNLPModels.jl b/src/MultObjNLPModels.jl index 20bdc52..6c13c6f 100644 --- a/src/MultObjNLPModels.jl +++ b/src/MultObjNLPModels.jl @@ -6,4 +6,6 @@ include("meta.jl") include("counters.jl") include("api.jl") +include("gradestoc.jl") +include("regressaomodel.jl") end # module diff --git a/src/gradestoc.jl b/src/gradestoc.jl new file mode 100644 index 0000000..9f09af6 --- /dev/null +++ b/src/gradestoc.jl @@ -0,0 +1,131 @@ +export grad_desc, grad_estoc, MultObjNLPModels.obj, MultObjNLPModels.grad! + + +function grad_desc(nlp::RegressaoModel; + γ = 1e-2, + max_eval=100000, + max_time = 60.0, + atol = 1e-10, + rtol = 1e-10 + ) + + start_time = time() + iter = 0 + β = nlp.meta.x0 + ∇ℓ(x,y,β) = ForwardDiff.gradient(β->nlp.ℓ(y, nlp.h(x,β)), β) #rever + f = nlp.ℓ(nlp.y, nlp.h(nlp.X,β)) + N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) + + ϵt = atol + rtol*N + Δt = time() - start_time + solved = N < ϵt + tired = (Δt > max_time) || (iter > max_eval) + + @info log_header([:iter, :f], + [Int, Float64], + hdr_override=Dict(:f => "f(β)")) + + @info log_row(Any[iter, f]) + + while !(solved || tired) + β -= γ * ∇ℓ(X,y,β) + + Δt = time() - start_time + N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) + solved = N < ϵt + tired = (Δt > max_time) || (iter > max_eval) + iter+=1 + end + + status = if solved + :first_order + elseif tired + if Δt >: max_time + :max_time + else + :max_eval + end + else + :unknown + end + + # return GenericExecutionStats(status, nlp; + # solution = β, + # objective=f, + # elapsed_time=Δt, + # iter=iter) + return X*β + end + + +function grad_estoc(nlp::RegressaoModel; + γ = 1e-2, + max_eval=100000, + max_time = 60.0, + atol = 1e-10, + rtol = 1e-10 + ) + + start_time = time() + iter = 0 + β = nlp.meta.x0 + n = nlp.meta.nobj + + # f = nlp.ℓ(y, nlp.h(X,β)) + # N = norm(nlp.ℓ(y, nlp.h(X,β))) + + ϵt = atol + rtol*N + Δt = time() - start_time + solved = N < ϵt + tired = (Δt > max_time) || (iter > max_eval) + + @info log_header([:iter, :f], + [Int, Float64], + hdr_override=Dict(:f => "f(β)")) + + @info log_row(Any[iter, f]) + + # while !(solved || tired) + for k = 1:10n + for i in shuffle(1:n) + β -= γ * MultObjNLPModels.grad!(nlp, i, β) + end + end + + status = if solved + :first_order + elseif tired + if Δt >: max_time + :max_time + else + :max_eval + end + else + :unknown + end + + # return GenericExecutionStats(status, nlp; + # solution = β, + # objective=f, + # elapsed_time=Δt, + # iter=iter) + + y_pred = [nlp.h(nlp.X[i,:],β) > 0.5 ? 1.0 : 0.0 for i=1:n ] + return y_pred + end + + + + function MultObjNLPModels.obj(nlp :: RegressaoModel, i :: Integer, β :: AbstractVector) + # @lencheck alguma coisa + return nlp.ℓ(nlp.y[i], h(nlp.X[i,:],β )) # ℓ( yᵢ, h(xᵢᵀβ) ) + end + + function MultObjNLPModels.grad!(nlp :: RegressaoModel, i :: Integer, x :: AbstractVector #= , g :: AbstractVector =#) + # NLPModels.@lencheck 2 x g + # NLPModels.@rangecheck 1 2 i + # increment!(nlp, :neval_gradi, i) + ∇f(x,y,β) = ForwardDiff.gradient(β->nlp.ℓ(y, nlp.h(x,β)), β) + return ∇f(nlp.X[i,:], nlp.y[i], x) + end + \ No newline at end of file diff --git a/src/regressaomodel.jl b/src/regressaomodel.jl new file mode 100644 index 0000000..77c45af --- /dev/null +++ b/src/regressaomodel.jl @@ -0,0 +1,33 @@ +export RegressaoModel, RegressaoLinearModel, RegressaoLogisticaModel + +mutable struct RegressaoModel <: AbstractMultObjModel + meta :: MultObjNLPMeta + counters :: MultObjCounters + + X :: Matrix + y :: Vector + h # h(x, w) + ℓ # ℓ(y, ŷ) + end + +function RegressaoModel(X, y, h, ℓ) + n, p = size(X) + + meta = MultObjNLPMeta(p, n) + counters = MultObjCounters(n) + return RegressaoModel(meta, counters, X, y, h, ℓ) + end + + function RegressaoLinearModel(X, y) + h(x, β) = x*β + ℓ(y, ŷ) = sum((y - ŷ).^2) / 2 + return RegressaoModel(X, y, h, ℓ) + end + + function RegressaoLogisticaModel(X, y) + h(x, β) = 1 / (1 + exp(-x'*β)) + ℓ(y, ŷ) = -y * log(ŷ) - (1 - y) * log(1 - ŷ) + return RegressaoModel(X, y, h, ℓ) + end + + \ No newline at end of file From 1b16ecb28cc6e61fb3f05756803471375f3aa257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= Date: Thu, 3 Sep 2020 18:48:28 -0300 Subject: [PATCH 02/21] =?UTF-8?q?Agora=20sim=20com=20o=20ingl=C3=AAs=20,?= =?UTF-8?q?=20mas=20ainda=20tentando=20rodar=20para=20arrumar=20os=20m?= =?UTF-8?q?=C3=A9todos.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/settings.json | 3 - example.jl | 2 +- exemplosrogerio.jl | 59 ++++++++++++++++- src/MultObjNLPModels.jl | 6 +- src/gradestoc.jl | 131 ------------------------------------- src/regressaomodel.jl | 33 ---------- src/regression_model.jl | 33 ++++++++++ src/sthocastic_gradient.jl | 77 ++++++++++++++++++++++ 8 files changed, 173 insertions(+), 171 deletions(-) delete mode 100644 .vscode/settings.json delete mode 100644 src/gradestoc.jl delete mode 100644 src/regressaomodel.jl create mode 100644 src/regression_model.jl create mode 100644 src/sthocastic_gradient.jl diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index e71bc36..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "julia.environmentPath": "c:\\Users\\Acer\\Desktop\\roger\\TCC\\MultObjNLPModels.jl" -} \ No newline at end of file diff --git a/example.jl b/example.jl index 3f16f15..64f0885 100644 --- a/example.jl +++ b/example.jl @@ -54,4 +54,4 @@ function NLPModels.hprod!(nlp :: ExampleMONLP, i :: Integer, x :: AbstractVector Hv[2] = -400 * x[1] * v[1] + 200 * v[2] end return Hv -end + diff --git a/exemplosrogerio.jl b/exemplosrogerio.jl index 63b75fd..4b900d7 100644 --- a/exemplosrogerio.jl +++ b/exemplosrogerio.jl @@ -1,5 +1,62 @@ using NLPModels, MultObjNLPModels, Random +function grad_desc(nlp::RegressaoModel; + γ = 1e-2, + max_eval=100000, + max_time = 60.0, + atol = 1e-10, + rtol = 1e-10 + ) + + start_time = time() + iter = 0 + β = nlp.meta.x0 + ∇ℓ(x,y,β) = ForwardDiff.gradient(β->nlp.ℓ(y, nlp.h(x,β)), β) #arrumar pro grad novo + f = nlp.ℓ(nlp.y, nlp.h(nlp.X,β)) + N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) + + ϵt = atol + rtol*N + Δt = time() - start_time + solved = N < ϵt + tired = (Δt > max_time) || (iter > max_eval) + + @info log_header([:iter, :f], + [Int, Float64], + hdr_override=Dict(:f => "f(β)")) + + @info log_row(Any[iter, f]) + + while !(solved || tired) + β -= γ * ∇ℓ(X,y,β) + + Δt = time() - start_time + N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) + solved = N < ϵt + tired = (Δt > max_time) || (iter > max_eval) + iter+=1 + end + + status = if solved + :first_order + elseif tired + if Δt >: max_time + :max_time + else + :max_eval + end + else + :unknown + end + + # return GenericExecutionStats(status, nlp; + # solution = β, + # objective=f, + # elapsed_time=Δt, + # iter=iter) + return X*β + end + + # REGRESSÃO LINEAR # h(β, x) = dot(β, x) # ℓ(y, ŷ) = sum((y - ŷ).^2) / 2 @@ -11,7 +68,7 @@ X = rand(n,p) y = [sum(X[i,:]).+randn()*0.5 for i=1:n] println("y=$y") -output = MultObjNLPModels.RegressaoLinearModel(X,y) +output = MultObjNLPModels.LinearRegresionModel(X,y) println() # println(output) diff --git a/src/MultObjNLPModels.jl b/src/MultObjNLPModels.jl index 6c13c6f..a70b0b7 100644 --- a/src/MultObjNLPModels.jl +++ b/src/MultObjNLPModels.jl @@ -1,11 +1,13 @@ module MultObjNLPModels +export RegressionModel, LinearRegressionModel, LogisticRegressionModel # vi que isso é feito no NLPModels, mas ainda assim não consegui + using NLPModels include("meta.jl") include("counters.jl") include("api.jl") -include("gradestoc.jl") -include("regressaomodel.jl") +include("sthocastic_gradient.jl") +include("regression_model.jl") end # module diff --git a/src/gradestoc.jl b/src/gradestoc.jl deleted file mode 100644 index 9f09af6..0000000 --- a/src/gradestoc.jl +++ /dev/null @@ -1,131 +0,0 @@ -export grad_desc, grad_estoc, MultObjNLPModels.obj, MultObjNLPModels.grad! - - -function grad_desc(nlp::RegressaoModel; - γ = 1e-2, - max_eval=100000, - max_time = 60.0, - atol = 1e-10, - rtol = 1e-10 - ) - - start_time = time() - iter = 0 - β = nlp.meta.x0 - ∇ℓ(x,y,β) = ForwardDiff.gradient(β->nlp.ℓ(y, nlp.h(x,β)), β) #rever - f = nlp.ℓ(nlp.y, nlp.h(nlp.X,β)) - N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) - - ϵt = atol + rtol*N - Δt = time() - start_time - solved = N < ϵt - tired = (Δt > max_time) || (iter > max_eval) - - @info log_header([:iter, :f], - [Int, Float64], - hdr_override=Dict(:f => "f(β)")) - - @info log_row(Any[iter, f]) - - while !(solved || tired) - β -= γ * ∇ℓ(X,y,β) - - Δt = time() - start_time - N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) - solved = N < ϵt - tired = (Δt > max_time) || (iter > max_eval) - iter+=1 - end - - status = if solved - :first_order - elseif tired - if Δt >: max_time - :max_time - else - :max_eval - end - else - :unknown - end - - # return GenericExecutionStats(status, nlp; - # solution = β, - # objective=f, - # elapsed_time=Δt, - # iter=iter) - return X*β - end - - -function grad_estoc(nlp::RegressaoModel; - γ = 1e-2, - max_eval=100000, - max_time = 60.0, - atol = 1e-10, - rtol = 1e-10 - ) - - start_time = time() - iter = 0 - β = nlp.meta.x0 - n = nlp.meta.nobj - - # f = nlp.ℓ(y, nlp.h(X,β)) - # N = norm(nlp.ℓ(y, nlp.h(X,β))) - - ϵt = atol + rtol*N - Δt = time() - start_time - solved = N < ϵt - tired = (Δt > max_time) || (iter > max_eval) - - @info log_header([:iter, :f], - [Int, Float64], - hdr_override=Dict(:f => "f(β)")) - - @info log_row(Any[iter, f]) - - # while !(solved || tired) - for k = 1:10n - for i in shuffle(1:n) - β -= γ * MultObjNLPModels.grad!(nlp, i, β) - end - end - - status = if solved - :first_order - elseif tired - if Δt >: max_time - :max_time - else - :max_eval - end - else - :unknown - end - - # return GenericExecutionStats(status, nlp; - # solution = β, - # objective=f, - # elapsed_time=Δt, - # iter=iter) - - y_pred = [nlp.h(nlp.X[i,:],β) > 0.5 ? 1.0 : 0.0 for i=1:n ] - return y_pred - end - - - - function MultObjNLPModels.obj(nlp :: RegressaoModel, i :: Integer, β :: AbstractVector) - # @lencheck alguma coisa - return nlp.ℓ(nlp.y[i], h(nlp.X[i,:],β )) # ℓ( yᵢ, h(xᵢᵀβ) ) - end - - function MultObjNLPModels.grad!(nlp :: RegressaoModel, i :: Integer, x :: AbstractVector #= , g :: AbstractVector =#) - # NLPModels.@lencheck 2 x g - # NLPModels.@rangecheck 1 2 i - # increment!(nlp, :neval_gradi, i) - ∇f(x,y,β) = ForwardDiff.gradient(β->nlp.ℓ(y, nlp.h(x,β)), β) - return ∇f(nlp.X[i,:], nlp.y[i], x) - end - \ No newline at end of file diff --git a/src/regressaomodel.jl b/src/regressaomodel.jl deleted file mode 100644 index 77c45af..0000000 --- a/src/regressaomodel.jl +++ /dev/null @@ -1,33 +0,0 @@ -export RegressaoModel, RegressaoLinearModel, RegressaoLogisticaModel - -mutable struct RegressaoModel <: AbstractMultObjModel - meta :: MultObjNLPMeta - counters :: MultObjCounters - - X :: Matrix - y :: Vector - h # h(x, w) - ℓ # ℓ(y, ŷ) - end - -function RegressaoModel(X, y, h, ℓ) - n, p = size(X) - - meta = MultObjNLPMeta(p, n) - counters = MultObjCounters(n) - return RegressaoModel(meta, counters, X, y, h, ℓ) - end - - function RegressaoLinearModel(X, y) - h(x, β) = x*β - ℓ(y, ŷ) = sum((y - ŷ).^2) / 2 - return RegressaoModel(X, y, h, ℓ) - end - - function RegressaoLogisticaModel(X, y) - h(x, β) = 1 / (1 + exp(-x'*β)) - ℓ(y, ŷ) = -y * log(ŷ) - (1 - y) * log(1 - ŷ) - return RegressaoModel(X, y, h, ℓ) - end - - \ No newline at end of file diff --git a/src/regression_model.jl b/src/regression_model.jl new file mode 100644 index 0000000..c9f41a2 --- /dev/null +++ b/src/regression_model.jl @@ -0,0 +1,33 @@ +export RegressionModel, LinearRegressionModel, LogisticRegressionModel + +mutable struct RegressionModel <: AbstractMultObjModel + meta :: MultObjNLPMeta + counters :: MultObjCounters + + X :: Matrix + y :: Vector + h # h(x, w) + ℓ # ℓ(y, ŷ) +end + +function RegressionModel(X, y, h, ℓ) + n, p = size(X) + + meta = MultObjNLPMeta(p, n) + counters = MultObjCounters(n) + return RegressionModel(meta, counters, X, y, h, ℓ) +end + +function LinearRegressionModel(X, y) + h(x, β) = dot(x,β) + ℓ(y, ŷ) = (y - ŷ)^2 / 2 + return RegressionModel(X, y, h, ℓ) +end + +function LogisticRegressionModel(X, y) + h(x, β) = 1 / (1 + exp(-dot(x,β))) + ℓ(y, ŷ) = -y * log(ŷ) - (1 - y) * log(1 - ŷ) + return RegressionModel(X, y, h, ℓ) +end + + \ No newline at end of file diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl new file mode 100644 index 0000000..38c9470 --- /dev/null +++ b/src/sthocastic_gradient.jl @@ -0,0 +1,77 @@ +export sthocastic_gradient , MultObjNLPModels.obj, MultObjNLPModels.grad! + + +function sthocastic_gradient( + nlp::RegressionModel; + γ = 1e-2, + max_eval=10000, + max_time = 60.0, + atol = 1e-8, + rtol = 1e-8 + ) + + start_time = time() + iter = 0 + β = nlp.meta.x0 + n = nlp.meta.nobj + + # f = nlp.ℓ(y, nlp.h(X,β)) + # N = norm(nlp.ℓ(y, nlp.h(X,β))) + + ϵt = atol + rtol*N + Δt = time() - start_time + solved = N < ϵt + tired = (Δt > max_time) || (iter > max_eval) + + @info log_header([:iter, :f], + [Int, Float64], + hdr_override=Dict(:f => "f(β)")) + + @info log_row(Any[iter, f]) + + # while !(solved || tired) + for k = 1:10n + for i in shuffle(1:n) + β -= γ * MultObjNLPModels.grad!(nlp, i, β, g) #preciso arrumar esse g + end + end + + status = if solved + :first_order + elseif tired + if Δt >: max_time + :max_time + else + :max_eval + end + else + :unknown + end + + # return GenericExecutionStats(status, nlp; #ainda não arrumei esse trecho porque não ta rodando + # solution = β, + # objective=f, + # elapsed_time=Δt, + # iter=iter) + + # y_pred = [nlp.h(nlp.X[i,:],β) > 0.5 ? 1.0 : 0.0 for i=1:n ] + return y_pred +end + + + +function MultObjNLPModels.obj(nlp :: RegressaoModel, i :: Integer, β :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar x + #NLPModels.@rangecheck 1 nlp.meta.nvar i #falta saber o que é o "1" + increment!(nlp, :neval_obji, i) + return nlp.ℓ(nlp.y[i], h(nlp.X[i,:],β )) # ℓ( yᵢ, h(xᵢᵀβ) ) +end + + function MultObjNLPModels.grad!(nlp :: RegressaoModel, i :: Integer, x :: AbstractVector , g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar x g + #NLPModels.@rangecheck 1 nlp.meta.nvar i #falta saber o que é o "1" + increment!(nlp, :neval_gradi, i) + ∇f(x,y,β) = ForwardDiff.gradient!(g, β->nlp.ℓ(y, nlp.h(x,β)), β) #preciso arrumar esse g ainda + #return ∇f(nlp.X[i,:], nlp.y[i], x) + return g +end From 54913b8481840af85b0ad7c9f355966f47a4e00a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= Date: Mon, 7 Sep 2020 18:23:26 -0300 Subject: [PATCH 03/21] [WIP]fix sthocastic gradient --- .gitignore | 4 ++ example.jl | 1 + exemplosrogerio.jl | 80 +++++++++++++++++----------------- src/MultObjNLPModels.jl | 4 +- src/regression_model.jl | 47 ++++++++++++++++---- src/sthocastic_gradient.jl | 88 ++++++++++++++++---------------------- 6 files changed, 123 insertions(+), 101 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..306b813 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ + +Manifest.toml + +.vscode/ diff --git a/example.jl b/example.jl index 64f0885..589f450 100644 --- a/example.jl +++ b/example.jl @@ -54,4 +54,5 @@ function NLPModels.hprod!(nlp :: ExampleMONLP, i :: Integer, x :: AbstractVector Hv[2] = -400 * x[1] * v[1] + 200 * v[2] end return Hv +end diff --git a/exemplosrogerio.jl b/exemplosrogerio.jl index 4b900d7..ddd8583 100644 --- a/exemplosrogerio.jl +++ b/exemplosrogerio.jl @@ -1,41 +1,44 @@ -using NLPModels, MultObjNLPModels, Random +using NLPModels, MultObjNLPModels, Random, LinearAlgebra -function grad_desc(nlp::RegressaoModel; +function grad_desc(nlp::AbstractNLPModel; γ = 1e-2, max_eval=100000, max_time = 60.0, - atol = 1e-10, - rtol = 1e-10 + atol = 1e-6, + rtol = 1e-6 ) - + start_time = time() iter = 0 β = nlp.meta.x0 - ∇ℓ(x,y,β) = ForwardDiff.gradient(β->nlp.ℓ(y, nlp.h(x,β)), β) #arrumar pro grad novo + ∇ℓ(x,y,β) = ForwardDiff.gradient(β->nlp.ℓ(y, nlp.h(x,β)), β) #nlp normal f = nlp.ℓ(nlp.y, nlp.h(nlp.X,β)) N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) - + ϵt = atol + rtol*N - Δt = time() - start_time - solved = N < ϵt + Δt = time() - start_time + solved = N < ϵt tired = (Δt > max_time) || (iter > max_eval) - + @info log_header([:iter, :f], [Int, Float64], hdr_override=Dict(:f => "f(β)")) - + @info log_row(Any[iter, f]) - + while !(solved || tired) + # for i=1:nlp.meta.nvar + # β[i] -= γ * MultObjNLPModels.grad!(nlp, i, β) + # end β -= γ * ∇ℓ(X,y,β) - + Δt = time() - start_time N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) solved = N < ϵt - tired = (Δt > max_time) || (iter > max_eval) + tired = (Δt > max_time) || (iter > max_eval) iter+=1 end - + status = if solved :first_order elseif tired @@ -47,10 +50,10 @@ function grad_desc(nlp::RegressaoModel; else :unknown end - - # return GenericExecutionStats(status, nlp; - # solution = β, - # objective=f, + + # return GenericExecutionStats(status, nlp; + # solution = β, + # objective=f, # elapsed_time=Δt, # iter=iter) return X*β @@ -62,34 +65,33 @@ function grad_desc(nlp::RegressaoModel; # ℓ(y, ŷ) = sum((y - ŷ).^2) / 2 # modelo = RegressaoModel(X, y, h, ℓ) -n,p = [10,2] +# n,p = [10,2] -X = rand(n,p) -y = [sum(X[i,:]).+randn()*0.5 for i=1:n] +# X = rand(n,p) +# y = [sum(X[i,:]).+randn()*0.5 for i=1:n] -println("y=$y") -output = MultObjNLPModels.LinearRegresionModel(X,y) +# println("y=$y") +# output = LinearRegressionModel(X,y) -println() +# println() # println(output) -println() -println(grad_desc(output)) +# println() +# println(grad_desc(output)) #################################################################### -# REGRESSÃO LOGISTICA +# REGRESSÃO LOGISTICA -# n = 50 -# β = ones(2) -# xl = sort(rand(n) * 2 .- 1) -# yl = [x + 0.2 + randn() * 0.25 > 0 ? 1 : 0 for x in xl] -# xl = hcat(ones(n), xl) -# println() -# println("y",yl) +n = 50 -# pred = MultObjNLPModels.RegressaoLogisticaModel(xl,yl) +xl = sort(rand(n) * 2 .- 1) +yl = [x + 0.2 + randn() * 0.25 > 0 ? 1 : 0 for x in xl] +xl = hcat(ones(n), xl) +println() +println("y",yl) -# println(pred) -# println() -# println(grad_estoc(pred)) +pred = LogisticRegressionModel(xl,yl) +println(pred) +println() +println(sthocastic_gradient(pred)) diff --git a/src/MultObjNLPModels.jl b/src/MultObjNLPModels.jl index a70b0b7..b00f56e 100644 --- a/src/MultObjNLPModels.jl +++ b/src/MultObjNLPModels.jl @@ -8,6 +8,6 @@ include("meta.jl") include("counters.jl") include("api.jl") -include("sthocastic_gradient.jl") include("regression_model.jl") -end # module +include("sthocastic_gradient.jl") +end # module \ No newline at end of file diff --git a/src/regression_model.jl b/src/regression_model.jl index c9f41a2..cc51aff 100644 --- a/src/regression_model.jl +++ b/src/regression_model.jl @@ -1,33 +1,64 @@ export RegressionModel, LinearRegressionModel, LogisticRegressionModel +using ForwardDiff + mutable struct RegressionModel <: AbstractMultObjModel meta :: MultObjNLPMeta counters :: MultObjCounters - + X :: Matrix y :: Vector h # h(x, w) ℓ # ℓ(y, ŷ) end +# function RegressionModel(X, y, f) function RegressionModel(X, y, h, ℓ) n, p = size(X) - + meta = MultObjNLPMeta(p, n) counters = MultObjCounters(n) + # return RegressionModel(meta, counters, X, y, f) return RegressionModel(meta, counters, X, y, h, ℓ) end - + function LinearRegressionModel(X, y) - h(x, β) = dot(x,β) + h(x, β) = dot(x, β) ℓ(y, ŷ) = (y - ŷ)^2 / 2 - return RegressionModel(X, y, h, ℓ) + # f(x, y, β) = ℓ(y, h(x, β)) + # return RegressionModel(X, y, f) + return RegressionModel(X, y, h, ℓ) end - + function LogisticRegressionModel(X, y) - h(x, β) = 1 / (1 + exp(-dot(x,β))) + h(x, β) = 1 / (1 + exp(-dot(x, β))) ℓ(y, ŷ) = -y * log(ŷ) - (1 - y) * log(1 - ŷ) + # f(x, y, β) = ℓ(y, h(x, β)) + # return RegressionModel(X, y, f) return RegressionModel(X, y, h, ℓ) end - \ No newline at end of file +# f(x) = ∑ᵢ σᵢ fᵢ(x) + +function MultObjNLPModels.obj(nlp :: RegressionModel, i :: Integer, β :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_obji, i) + return nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)) # ℓ( yᵢ, h(xᵢ, β) ) + # return nlp.f(X[i,:],y[i],β) + # return nlp.obj(nlp.X) + # return nlp.f[i](β) + # return nlp.f(β)[i] +end + + function MultObjNLPModels.grad!(nlp :: RegressionModel, i :: Integer, β :: AbstractVector , g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_gradi, i) + ForwardDiff.gradient!(g, β->nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)), β) + # ForwardDiff.gradient!(g, nlp.f(X[i,:]),β) + # ForwardDiff.gradient!(g, nlp.f[i], β) + # ForwardDiff.gradient(nlp.f,β)[i] + + return g +end \ No newline at end of file diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 38c9470..6abd6f8 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -1,77 +1,61 @@ -export sthocastic_gradient , MultObjNLPModels.obj, MultObjNLPModels.grad! +using LinearAlgebra, Random, SolverTools +export sthocastic_gradient function sthocastic_gradient( - nlp::RegressionModel; + nlp::AbstractMultObjModel; γ = 1e-2, max_eval=10000, max_time = 60.0, atol = 1e-8, - rtol = 1e-8 + rtol = 1e-8 ) - + start_time = time() iter = 0 β = nlp.meta.x0 n = nlp.meta.nobj + g = similar(β) # f = nlp.ℓ(y, nlp.h(X,β)) # N = norm(nlp.ℓ(y, nlp.h(X,β))) - - ϵt = atol + rtol*N - Δt = time() - start_time - solved = N < ϵt - tired = (Δt > max_time) || (iter > max_eval) - - @info log_header([:iter, :f], - [Int, Float64], - hdr_override=Dict(:f => "f(β)")) - - @info log_row(Any[iter, f]) - + + # ϵt = atol + rtol*N + # Δt = time() - start_time + # solved = N < ϵt + # tired = (Δt > max_time) || (iter > max_eval) + + # @info log_header([:iter, :f], + # [Int, Float64], + # hdr_override=Dict(:f => "f(β)")) + + # @info log_row(Any[iter, f]) + # while !(solved || tired) for k = 1:10n for i in shuffle(1:n) - β -= γ * MultObjNLPModels.grad!(nlp, i, β, g) #preciso arrumar esse g + β -= γ * grad!(nlp, i, β, g) #preciso arrumar esse g end end - status = if solved - :first_order - elseif tired - if Δt >: max_time - :max_time - else - :max_eval - end - else - :unknown - end - + # status = if solved + # :first_order + # elseif tired + # if Δt >: max_time + # :max_time + # else + # :max_eval + # end + # else + # :unknown + # end + # return GenericExecutionStats(status, nlp; #ainda não arrumei esse trecho porque não ta rodando - # solution = β, - # objective=f, + # solution = β, + # objective=f, # elapsed_time=Δt, # iter=iter) - # y_pred = [nlp.h(nlp.X[i,:],β) > 0.5 ? 1.0 : 0.0 for i=1:n ] - return y_pred -end - - - -function MultObjNLPModels.obj(nlp :: RegressaoModel, i :: Integer, β :: AbstractVector) - NLPModels.@lencheck nlp.meta.nvar x - #NLPModels.@rangecheck 1 nlp.meta.nvar i #falta saber o que é o "1" - increment!(nlp, :neval_obji, i) - return nlp.ℓ(nlp.y[i], h(nlp.X[i,:],β )) # ℓ( yᵢ, h(xᵢᵀβ) ) -end - - function MultObjNLPModels.grad!(nlp :: RegressaoModel, i :: Integer, x :: AbstractVector , g :: AbstractVector) - NLPModels.@lencheck nlp.meta.nvar x g - #NLPModels.@rangecheck 1 nlp.meta.nvar i #falta saber o que é o "1" - increment!(nlp, :neval_gradi, i) - ∇f(x,y,β) = ForwardDiff.gradient!(g, β->nlp.ℓ(y, nlp.h(x,β)), β) #preciso arrumar esse g ainda - #return ∇f(nlp.X[i,:], nlp.y[i], x) - return g -end + # y_pred = [nlp.h(nlp.X[i,:],β) > 0.5 ? 1.0 : 0.0 for i=1:n ] + return β +end \ No newline at end of file From c61428158381f9bf4e2458c551dab2aba7d08f50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= Date: Wed, 9 Sep 2020 19:31:29 -0300 Subject: [PATCH 04/21] [WIP] start with GenericExecutionStats --- src/MultObjNLPModels.jl | 2 -- src/sthocastic_gradient.jl | 57 ++++++++++++++++++++------------------ 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/src/MultObjNLPModels.jl b/src/MultObjNLPModels.jl index b00f56e..7cf0633 100644 --- a/src/MultObjNLPModels.jl +++ b/src/MultObjNLPModels.jl @@ -1,7 +1,5 @@ module MultObjNLPModels -export RegressionModel, LinearRegressionModel, LogisticRegressionModel # vi que isso é feito no NLPModels, mas ainda assim não consegui - using NLPModels include("meta.jl") diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 6abd6f8..f012a54 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -1,5 +1,8 @@ using LinearAlgebra, Random, SolverTools +NLPModels.has_bounds(meta::MultObjNLPMeta) = length(meta.ifree) < meta.nvar +NLPModels.unconstrained(meta::MultObjNLPMeta) = meta.ncon == 0 && !has_bounds(meta) + export sthocastic_gradient function sthocastic_gradient( @@ -17,13 +20,13 @@ function sthocastic_gradient( n = nlp.meta.nobj g = similar(β) - # f = nlp.ℓ(y, nlp.h(X,β)) - # N = norm(nlp.ℓ(y, nlp.h(X,β))) + # f = nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:],β)) + # N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) # ϵt = atol + rtol*N - # Δt = time() - start_time + Δt = time() - start_time # solved = N < ϵt - # tired = (Δt > max_time) || (iter > max_eval) + tired = (Δt > max_time) || (iter > max_eval) # @info log_header([:iter, :f], # [Int, Float64], @@ -31,31 +34,31 @@ function sthocastic_gradient( # @info log_row(Any[iter, f]) - # while !(solved || tired) - for k = 1:10n + for k = 1:30n # I got better results with this number for i in shuffle(1:n) - β -= γ * grad!(nlp, i, β, g) #preciso arrumar esse g + β -= γ * grad!(nlp, i, β, g) + end + + Δt = time() - start_time + iter+=1 + tired = (Δt > max_time) || (iter > max_eval) + end + + status = if (iter == 30n) + :first_order + elseif tired + if Δt >: max_time + :max_time + else + :max_eval end + else + :unknown end - # status = if solved - # :first_order - # elseif tired - # if Δt >: max_time - # :max_time - # else - # :max_eval - # end - # else - # :unknown - # end - - # return GenericExecutionStats(status, nlp; #ainda não arrumei esse trecho porque não ta rodando - # solution = β, - # objective=f, - # elapsed_time=Δt, - # iter=iter) - - # y_pred = [nlp.h(nlp.X[i,:],β) > 0.5 ? 1.0 : 0.0 for i=1:n ] - return β + return GenericExecutionStats(status, nlp; + solution = β, + #= objective=f, =# + elapsed_time=Δt, + iter=iter) end \ No newline at end of file From 5f95cb2cd49f50cebd38d979ceac2a47de6e7b8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= Date: Mon, 5 Oct 2020 18:25:11 -0300 Subject: [PATCH 05/21] [WIP] Hyper parameters organized --- src/regression_model.jl | 28 +++----------- src/sthocastic_gradient.jl | 77 ++++++++++++++++++++++++++------------ 2 files changed, 60 insertions(+), 45 deletions(-) diff --git a/src/regression_model.jl b/src/regression_model.jl index cc51aff..8ed6b28 100644 --- a/src/regression_model.jl +++ b/src/regression_model.jl @@ -1,6 +1,6 @@ export RegressionModel, LinearRegressionModel, LogisticRegressionModel -using ForwardDiff +using ForwardDiff, LinearAlgebra mutable struct RegressionModel <: AbstractMultObjModel meta :: MultObjNLPMeta @@ -12,53 +12,37 @@ mutable struct RegressionModel <: AbstractMultObjModel ℓ # ℓ(y, ŷ) end -# function RegressionModel(X, y, f) function RegressionModel(X, y, h, ℓ) n, p = size(X) meta = MultObjNLPMeta(p, n) counters = MultObjCounters(n) - # return RegressionModel(meta, counters, X, y, f) return RegressionModel(meta, counters, X, y, h, ℓ) end function LinearRegressionModel(X, y) - h(x, β) = dot(x, β) + h(x, β) = dot(x,β) ℓ(y, ŷ) = (y - ŷ)^2 / 2 - # f(x, y, β) = ℓ(y, h(x, β)) - # return RegressionModel(X, y, f) return RegressionModel(X, y, h, ℓ) end function LogisticRegressionModel(X, y) h(x, β) = 1 / (1 + exp(-dot(x, β))) - ℓ(y, ŷ) = -y * log(ŷ) - (1 - y) * log(1 - ŷ) - # f(x, y, β) = ℓ(y, h(x, β)) - # return RegressionModel(X, y, f) + ℓ(y, ŷ) = -y * log(ŷ + 1e-8) - (1 - y) * log(1 - ŷ + 1e-8) return RegressionModel(X, y, h, ℓ) end -# f(x) = ∑ᵢ σᵢ fᵢ(x) - function MultObjNLPModels.obj(nlp :: RegressionModel, i :: Integer, β :: AbstractVector) NLPModels.@lencheck nlp.meta.nvar β NLPModels.@rangecheck 1 nlp.meta.nobj i increment!(nlp, :neval_obji, i) - return nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)) # ℓ( yᵢ, h(xᵢ, β) ) - # return nlp.f(X[i,:],y[i],β) - # return nlp.obj(nlp.X) - # return nlp.f[i](β) - # return nlp.f(β)[i] + return nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)) end - function MultObjNLPModels.grad!(nlp :: RegressionModel, i :: Integer, β :: AbstractVector , g :: AbstractVector) +function MultObjNLPModels.grad!(nlp :: RegressionModel, i :: Integer, β :: AbstractVector , g :: AbstractVector) NLPModels.@lencheck nlp.meta.nvar β g NLPModels.@rangecheck 1 nlp.meta.nobj i increment!(nlp, :neval_gradi, i) ForwardDiff.gradient!(g, β->nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)), β) - # ForwardDiff.gradient!(g, nlp.f(X[i,:]),β) - # ForwardDiff.gradient!(g, nlp.f[i], β) - # ForwardDiff.gradient(nlp.f,β)[i] - return g -end \ No newline at end of file +end \ No newline at end of file diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index f012a54..30fba41 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -7,49 +7,79 @@ export sthocastic_gradient function sthocastic_gradient( nlp::AbstractMultObjModel; - γ = 1e-2, - max_eval=10000, - max_time = 60.0, - atol = 1e-8, - rtol = 1e-8 + learning_rate::Symbol = :optimal, + γ::Float64 = 1e-2, + α::Float64 = 1e-2, # for penalty + ρ::Float64 = 0.85, # just for elasticnet + penalty::Symbol = :l2, + max_eval::Int=50, + max_time::Float64 = 60.0, + atol::Float64 = 1e-8, + rtol::Float64 = 1e-8, + power_t::Float64 = 1e-2 ) - start_time = time() iter = 0 + start_time = time() β = nlp.meta.x0 + βavg = similar(β) # test later n = nlp.meta.nobj g = similar(β) + f = NLPModels.obj(nlp, β) + NLPModels.grad!(nlp,β,g) + + Δt = time() - start_time + + tired = (Δt > max_time) + itermax = iter > max_eval + + @info log_header([:iter, :f], + [Int, Float64], + hdr_override=Dict(:f => "f(β)")) - # f = nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:],β)) - # N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) + @info log_row(Any[iter, f]) - # ϵt = atol + rtol*N - Δt = time() - start_time - # solved = N < ϵt - tired = (Δt > max_time) || (iter > max_eval) + + P = if penalty == :l2 + β -> β + elseif penalty == :l1 + β -> sign.(β) + elseif penalty == :elasticnet + β -> ρ * β + (1-ρ) * sign.(β) + end + - # @info log_header([:iter, :f], - # [Int, Float64], - # hdr_override=Dict(:f => "f(β)")) + for k = 1:max_eval + # while !(γ < 0.01 || tired) # sounds good. Doesnt work - # @info log_row(Any[iter, f]) + if learning_rate == :optimal + γ = 1/(α * (1e3 + iter)) + elseif learning_rate == :invscaling + γ = 1e-2 / (iter+1)^(power_t) + elseif learning_rate == :constant + γ = γ + end - for k = 1:30n # I got better results with this number + βavg .= 0 # test later for i in shuffle(1:n) - β -= γ * grad!(nlp, i, β, g) + β -= γ * (α * P(β) + grad!(nlp, i, β, g)) + βavg += β # test later end + βavg = βavg/n # test later Δt = time() - start_time + tired = (Δt > max_time) + solved = (γ < 1e-2) + itermax = iter > max_eval iter+=1 - tired = (Δt > max_time) || (iter > max_eval) end - status = if (iter == 30n) + status = if (iter == max_eval) :first_order elseif tired if Δt >: max_time :max_time - else + elseif itermax :max_eval end else @@ -58,7 +88,8 @@ function sthocastic_gradient( return GenericExecutionStats(status, nlp; solution = β, - #= objective=f, =# + solver_specific=Dict(:βavg=>βavg), elapsed_time=Δt, - iter=iter) + iter=iter + ) end \ No newline at end of file From e97755adb1f7a3f009877a15e5ecc15f09d1b904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:49:33 -0300 Subject: [PATCH 06/21] Update src/MultObjNLPModels.jl Co-authored-by: Abel Siqueira --- src/MultObjNLPModels.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/MultObjNLPModels.jl b/src/MultObjNLPModels.jl index 7cf0633..6e0931c 100644 --- a/src/MultObjNLPModels.jl +++ b/src/MultObjNLPModels.jl @@ -8,4 +8,5 @@ include("api.jl") include("regression_model.jl") include("sthocastic_gradient.jl") -end # module \ No newline at end of file + +end # module From a71677f6284c12f29bc5a1b3c89f8ed55b8fe231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:49:44 -0300 Subject: [PATCH 07/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 30fba41..71d67a1 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -22,7 +22,7 @@ function sthocastic_gradient( iter = 0 start_time = time() β = nlp.meta.x0 - βavg = similar(β) # test later + βavg = similar(β) n = nlp.meta.nobj g = similar(β) f = NLPModels.obj(nlp, β) @@ -92,4 +92,4 @@ function sthocastic_gradient( elapsed_time=Δt, iter=iter ) -end \ No newline at end of file +end From c6c3e942aed74b5241d5e5a893a29457a80bf23b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:50:07 -0300 Subject: [PATCH 08/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 71d67a1..01290e8 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -12,7 +12,7 @@ function sthocastic_gradient( α::Float64 = 1e-2, # for penalty ρ::Float64 = 0.85, # just for elasticnet penalty::Symbol = :l2, - max_eval::Int=50, + max_eval::Int = 50, max_time::Float64 = 60.0, atol::Float64 = 1e-8, rtol::Float64 = 1e-8, From 64e4c0f3a2b50f75ff49c5d4b0a63113b058a8e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:50:15 -0300 Subject: [PATCH 09/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 01290e8..6b3f286 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -26,7 +26,7 @@ function sthocastic_gradient( n = nlp.meta.nobj g = similar(β) f = NLPModels.obj(nlp, β) - NLPModels.grad!(nlp,β,g) + NLPModels.grad!(nlp, β, g) Δt = time() - start_time From a6b1fe13b2a7bf81a14233d5ad38179dbd122a2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:50:26 -0300 Subject: [PATCH 10/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 6b3f286..d09180f 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -70,8 +70,8 @@ function sthocastic_gradient( Δt = time() - start_time tired = (Δt > max_time) solved = (γ < 1e-2) + iter += 1 itermax = iter > max_eval - iter+=1 end status = if (iter == max_eval) From b0209a292a57de505811836ede09ec2a99c41d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:51:34 -0300 Subject: [PATCH 11/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index d09180f..1532e4a 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -53,7 +53,7 @@ function sthocastic_gradient( # while !(γ < 0.01 || tired) # sounds good. Doesnt work if learning_rate == :optimal - γ = 1/(α * (1e3 + iter)) + γ = 1 / (α * (1e3 + iter)) elseif learning_rate == :invscaling γ = 1e-2 / (iter+1)^(power_t) elseif learning_rate == :constant From 7278d2fe29294f02fb996c62af9e9d4efb231a4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:51:44 -0300 Subject: [PATCH 12/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 1532e4a..880edc9 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -56,8 +56,6 @@ function sthocastic_gradient( γ = 1 / (α * (1e3 + iter)) elseif learning_rate == :invscaling γ = 1e-2 / (iter+1)^(power_t) - elseif learning_rate == :constant - γ = γ end βavg .= 0 # test later From 0bb7de3e620d767f3318bfc54e1b10a2c35d7379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:51:52 -0300 Subject: [PATCH 13/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 880edc9..044ebfe 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -66,8 +66,8 @@ function sthocastic_gradient( βavg = βavg/n # test later Δt = time() - start_time - tired = (Δt > max_time) - solved = (γ < 1e-2) + tired = Δt > max_time + solved = γ < 1e-2 iter += 1 itermax = iter > max_eval end From 784c95a59e23007e519130f52d6e25f18ba1a66b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:52:04 -0300 Subject: [PATCH 14/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 044ebfe..f85de3d 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -85,9 +85,9 @@ function sthocastic_gradient( end return GenericExecutionStats(status, nlp; - solution = β, - solver_specific=Dict(:βavg=>βavg), - elapsed_time=Δt, - iter=iter - ) + solution=β, + solver_specific=Dict(:βavg => βavg), + elapsed_time=Δt, + iter=iter + ) end From a1e52bd9e8d4efa6b3e7c123efa93a588964f408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:52:26 -0300 Subject: [PATCH 15/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index f85de3d..6125121 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -29,7 +29,6 @@ function sthocastic_gradient( NLPModels.grad!(nlp, β, g) Δt = time() - start_time - tired = (Δt > max_time) itermax = iter > max_eval From f0515537965de37a4e3625ea2d682242b40306ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:52:37 -0300 Subject: [PATCH 16/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 6125121..38dc51a 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -44,7 +44,7 @@ function sthocastic_gradient( elseif penalty == :l1 β -> sign.(β) elseif penalty == :elasticnet - β -> ρ * β + (1-ρ) * sign.(β) + β -> ρ * β + (1 - ρ) * sign.(β) end From df0995470c2378d90f0b2d15f2563d3e66773945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:52:50 -0300 Subject: [PATCH 17/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 38dc51a..660a4ff 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -49,7 +49,6 @@ function sthocastic_gradient( for k = 1:max_eval - # while !(γ < 0.01 || tired) # sounds good. Doesnt work if learning_rate == :optimal γ = 1 / (α * (1e3 + iter)) From 136d220e43bda9affde24adb59b5403b82c52444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:53:03 -0300 Subject: [PATCH 18/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 660a4ff..2d1a327 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -56,12 +56,12 @@ function sthocastic_gradient( γ = 1e-2 / (iter+1)^(power_t) end - βavg .= 0 # test later + βavg .= 0 for i in shuffle(1:n) β -= γ * (α * P(β) + grad!(nlp, i, β, g)) - βavg += β # test later + βavg += β end - βavg = βavg/n # test later + βavg = βavg / n Δt = time() - start_time tired = Δt > max_time From 0b47f52ef61da825575a081a2f802bb379ae63f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 11:53:14 -0300 Subject: [PATCH 19/21] Update src/sthocastic_gradient.jl Co-authored-by: Abel Siqueira --- src/sthocastic_gradient.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index 2d1a327..b39e069 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -53,7 +53,7 @@ function sthocastic_gradient( if learning_rate == :optimal γ = 1 / (α * (1e3 + iter)) elseif learning_rate == :invscaling - γ = 1e-2 / (iter+1)^(power_t) + γ = 1e-2 / (iter + 1)^power_t end βavg .= 0 From 1e5e0659df8cca75fcf2061b53fafa2ef5566447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 8 Oct 2020 12:46:26 -0300 Subject: [PATCH 20/21] Delete exemplosrogerio.jl I have been using just for little tests. Now doesnt need more. --- exemplosrogerio.jl | 97 ---------------------------------------------- 1 file changed, 97 deletions(-) delete mode 100644 exemplosrogerio.jl diff --git a/exemplosrogerio.jl b/exemplosrogerio.jl deleted file mode 100644 index ddd8583..0000000 --- a/exemplosrogerio.jl +++ /dev/null @@ -1,97 +0,0 @@ -using NLPModels, MultObjNLPModels, Random, LinearAlgebra - -function grad_desc(nlp::AbstractNLPModel; - γ = 1e-2, - max_eval=100000, - max_time = 60.0, - atol = 1e-6, - rtol = 1e-6 - ) - - start_time = time() - iter = 0 - β = nlp.meta.x0 - ∇ℓ(x,y,β) = ForwardDiff.gradient(β->nlp.ℓ(y, nlp.h(x,β)), β) #nlp normal - f = nlp.ℓ(nlp.y, nlp.h(nlp.X,β)) - N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) - - ϵt = atol + rtol*N - Δt = time() - start_time - solved = N < ϵt - tired = (Δt > max_time) || (iter > max_eval) - - @info log_header([:iter, :f], - [Int, Float64], - hdr_override=Dict(:f => "f(β)")) - - @info log_row(Any[iter, f]) - - while !(solved || tired) - # for i=1:nlp.meta.nvar - # β[i] -= γ * MultObjNLPModels.grad!(nlp, i, β) - # end - β -= γ * ∇ℓ(X,y,β) - - Δt = time() - start_time - N = norm(nlp.ℓ(nlp.y, nlp.h(nlp.X,β))) - solved = N < ϵt - tired = (Δt > max_time) || (iter > max_eval) - iter+=1 - end - - status = if solved - :first_order - elseif tired - if Δt >: max_time - :max_time - else - :max_eval - end - else - :unknown - end - - # return GenericExecutionStats(status, nlp; - # solution = β, - # objective=f, - # elapsed_time=Δt, - # iter=iter) - return X*β - end - - -# REGRESSÃO LINEAR -# h(β, x) = dot(β, x) -# ℓ(y, ŷ) = sum((y - ŷ).^2) / 2 -# modelo = RegressaoModel(X, y, h, ℓ) - -# n,p = [10,2] - -# X = rand(n,p) -# y = [sum(X[i,:]).+randn()*0.5 for i=1:n] - -# println("y=$y") -# output = LinearRegressionModel(X,y) - -# println() -# println(output) -# println() -# println(grad_desc(output)) - -#################################################################### - -# REGRESSÃO LOGISTICA - -n = 50 - -xl = sort(rand(n) * 2 .- 1) -yl = [x + 0.2 + randn() * 0.25 > 0 ? 1 : 0 for x in xl] -xl = hcat(ones(n), xl) -println() -println("y",yl) - -pred = LogisticRegressionModel(xl,yl) - -println(pred) -println() -println(sthocastic_gradient(pred)) From 83b3d1d6447f48c1f10efb68dfb94ebeca311fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Mainardes?= <63982716+RogerioOMDS@users.noreply.github.com> Date: Thu, 4 Mar 2021 23:59:17 -0300 Subject: [PATCH 21/21] Mini-batches and Regularization --- src/MultObjNLPModels.jl | 33 ++- src/api.jl | 382 ++++++++++++++++++++----------- src/auxiliary.jl | 61 +++++ src/counters.jl | 84 +++---- src/linear-regression-model.jl | 49 ++++ src/logistic-regression-model.jl | 52 +++++ src/meta.jl | 382 +++++++++++++++---------------- src/regression_model.jl | 100 ++++---- src/sthocastic_gradient.jl | 208 +++++++++-------- 9 files changed, 821 insertions(+), 530 deletions(-) create mode 100644 src/auxiliary.jl create mode 100644 src/linear-regression-model.jl create mode 100644 src/logistic-regression-model.jl diff --git a/src/MultObjNLPModels.jl b/src/MultObjNLPModels.jl index 6e0931c..2783a7b 100644 --- a/src/MultObjNLPModels.jl +++ b/src/MultObjNLPModels.jl @@ -1,12 +1,21 @@ -module MultObjNLPModels - -using NLPModels - -include("meta.jl") -include("counters.jl") -include("api.jl") - -include("regression_model.jl") -include("sthocastic_gradient.jl") - -end # module +module MultObjNLPModels + +# stdlib +using SparseArrays + +# external packages +using FastClosures + +# JSO packages +using LinearOperators, NLPModels + +include("api.jl") # Defines the model and the functions +include("meta.jl") # Defines the derived meta structure +include("counters.jl") + +include("auxiliary.jl") + +include("regression_model.jl") +include("sthocastic_gradient.jl") + +end # module diff --git a/src/api.jl b/src/api.jl index edede09..2f7a25e 100644 --- a/src/api.jl +++ b/src/api.jl @@ -1,141 +1,243 @@ -export AbstractMultObjModel, - obj, grad!, hess_structure!, hess_coord!, hprod!, - increment! - -""" - AbstractMultObjModel - -`AbstractMultObjModel` is an abstract type for problems with multiple objective. -The default way of looking into these models is by defining the objective - -```math -f(x) = ∑ᵢ σᵢ fᵢ(x) -```` - -An `AbstractMultObjModel` is expected to have: -- `meta :: MultObjNLPMeta` -""" -abstract type AbstractMultObjModel <: AbstractNLPModel end - -sum_counters(nls :: AbstractMultObjModel) = sum_counters(nls.counters) - -import Base.show -show_header(io :: IO, nlp :: AbstractMultObjModel) = println(io, typeof(nlp)) - -function show(io :: IO, nlp :: AbstractMultObjModel) - show_header(io, nlp) - show(io, nlp.meta) - show(io, nlp.counters) -end - -for counter in fieldnames(MultObjCounters) - counter == :counters && continue - @eval begin - """ - $($counter)(nlp) - - Get the number of `$(split("$($counter)", "_")[2])` evaluations. - """ - $counter(nls :: AbstractMultObjModel) = nls.counters.$counter - export $counter - end -end - -for counter in fieldnames(Counters) - @eval begin - $counter(nls :: AbstractMultObjModel) = nls.counters.counters.$counter - export $counter - end -end - -""" - increment!(nlp, s, i) - -Increment counter `s[i]` of problem `nlp`. -""" -function increment!(nlp :: AbstractMultObjModel, s :: Symbol, i :: Integer) - getproperty(nlp.counters, s)[i] += 1 -end - -function NLPModels.reset!(nls :: AbstractMultObjModel) - reset!(nls.counters) - return nls -end - -# New multiple objective functions: - -""" - f = obj(nlp, i, x) - -Evaluate ``fᵢ(x)``, the i-th objective function of `nlp` at `x`. -""" -function obj end - -""" - g = grad!(nlp, i, x, g) - -Evaluate ``∇fᵢ(x)``, the gradient of the i-th objective function at `x` in place. -""" -function grad! end - -""" - hess_structure!(nlp, i, rows, cols) - -Return the structure of the Lagrangian Hessian in sparse coordinate format in place. -""" -function hess_structure! end - -""" - vals = hess_coord!(nlp, i, x, vals) - -Evaluate the i-th objective Hessian at `x` in sparse coordinate format, -i.e., ``∇²fᵢ(x)``, rewriting `vals`. -Only the lower triangle is returned. -""" -function hess_coord! end - -""" - Hv = hprod!(nlp, i, x, v, Hv) - -Evaluate the product of the i-th objective Hessian at `x` with the vector `v` in -place, where the objective Hessian is ``∇²fᵢ(x)``. -""" -function hprod! end - -# NLPModels functions - -function NLPModels.obj(nlp :: AbstractMultObjModel, x :: AbstractVector) - return sum(obj(nlp, i, x) * nlp.meta.weights[i] for i = 1:nlp.meta.nobj) -end - -function NLPModels.grad!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, g :: AbstractVector) where T - fill!(g, zero(T)) - gi = fill!(similar(g), zero(T)) - for i = 1:nlp.meta.nobj - grad!(nlp, i, x, gi) - g .+= nlp.meta.weights[i] * gi - end - return g -end - -function NLPModels.hess_structure!(nlp :: AbstractMultObjModel, hrows :: AbstractVector{<: Integer}, hcols :: AbstractVector{<: Integer}) -end - -function NLPModels.hess_coord!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, hvals :: AbstractVector; obj_weight :: Real=one(T)) where T -end - -function NLPModels.hess_coord!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, y :: AbstractVector, hvals :: AbstractVector; obj_weight :: Real=one(T)) where T -end - -function NLPModels.hprod!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Real=one(T)) where T - fill!(Hv, zero(T)) - Hiv = fill!(similar(Hv), zero(T)) - for i = 1:nlp.meta.nobj - hprod!(nlp, i, x, v, Hiv) - Hv .+= nlp.meta.weights[i] * Hiv - end - return Hv -end - -function NLPModels.hprod!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, y :: AbstractVector, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Real=one(T)) where T +export AbstractMultObjModel + +""" + AbstractMultObjModel + +`AbstractMultObjModel` is an abstract type for problems with multiple objective. +The default way of looking into these models is by defining the objective + +```math +f(x) = ∑ᵢ σᵢ fᵢ(x) +```` + +An `AbstractMultObjModel` is expected to have: +- `meta :: MultObjNLPMeta` +""" +abstract type AbstractMultObjModel <: AbstractNLPModel end + +# New multiple objective functions: + +""" + f = obj(nlp, i, x) + +Evaluate ``fᵢ(x)``, the i-th objective function of `nlp` at `x`. +""" +NLPModels.obj(nlp, i, x) + +""" + g = grad!(nlp, i, x, g) + +Evaluate ``∇fᵢ(x)``, the gradient of the i-th objective function at `x` in place. +""" +NLPModels.grad!(nlp, i, x, g) + +""" + g = grad(nlp, i, x) + +Evaluate ``∇fᵢ(x)``, the gradient of the i-th objective function at `x`. +""" +function NLPModels.grad(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector) + @lencheck nlp.meta.nvar x + NLPModels.@rangecheck 1 nlp.meta.nobj i + g = zeros(eltype(x), nlp.meta.nvar) + grad!(nlp, i, x, g) +end + +""" + grad!(nlp, J, x, g) + +Evaluate ``∑ᵢ∈J ∇fᵢ(x)``, the sum of the gradients of the i-th objective functions at `x` for every `i` in `J`. +Also known as the batch gradient with batch `J`. +""" +function NLPModels.grad!(nlp :: AbstractMultObjModel, J :: AbstractVector{<: Integer}, x :: AbstractVector{T}, g :: AbstractVector) where T + @lencheck nlp.meta.nvar x g + fill!(g, zero(T)) + gi = fill!(similar(g), zero(T)) + for i = J + grad!(nlp, i, x, gi) + g .+= nlp.meta.weights[i] * gi + end + return g +end + +""" + g = grad(nlp, J, x) + +Evaluate ``∑ᵢ∈J ∇fᵢ(x)``, the sum of the gradients of the i-th objective functions at `x` for every `i` in `J`. +Also known as the batch gradient with batch `J`. +""" +function NLPModels.grad!(nlp :: AbstractMultObjModel, J :: AbstractVector{<: Integer}, x :: AbstractVector{T}) where T + g = similar(x) + return grad!(nlp, J, x, g) +end + +""" + hess_structure!(nlp, i, rows, cols) + +Return the structure of the Lagrangian Hessian in sparse coordinate format in place. +""" +NLPModels.hess_structure!(nlp, i, rows, cols) + +""" + hess_structure!(nlp, i) + +Return the structure of the Lagrangian Hessian in sparse coordinate format. +""" +function hess_structure(nlp :: AbstractMultObjModel, i :: Integer) + NLPModels.@rangecheck 1 nlp.meta.nobj i + rows = zeros(Int, nlp.meta.nnzhi[i]) + cols = zeros(Int, nlp.meta.nnzhi[i]) + hess_structure!(nlp, i, rows, cols) +end + +""" + vals = hess_coord!(nlp, i, x, vals) + +Evaluate the i-th objective Hessian at `x` in sparse coordinate format, +i.e., ``∇²fᵢ(x)``, rewriting `vals`. +Only the lower triangle is returned. +""" +NLPModels.hess_coord!(nlp, i, x, vals) + +""" + vals = hess_coord(nlp, i, x) + +Evaluate the i-th objective Hessian at `x` in sparse coordinate format, +i.e., ``∇²fᵢ(x)``. +Only the lower triangle is returned. +""" +function hess_coord(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector) + @lencheck nlp.meta.nvar x + NLPModels.@rangecheck 1 nlp.meta.nobj i + vals = zeros(eltype(x), nlp.meta.nnzhi[i]) + hess_coord!(nlp, i, x, vals) +end + +""" + Hv = hprod!(nlp, i, x, v, Hv) + +Evaluate the product of the i-th objective Hessian at `x` with the vector `v` in +place, where the objective Hessian is ``∇²fᵢ(x)``. +""" +NLPModels.hprod!(nlp, i, x, v, Hv) + +""" + Hv = hprod(nlp, i, x, v) + +Evaluate the product of the i-th objective Hessian at `x` with the vector `v`, +where the objective Hessian is ``∇²fᵢ(x)``. +""" +function hprod(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector, v :: AbstractVector) + @lencheck nlp.meta.nvar x v + NLPModels.@rangecheck 1 nlp.meta.nobj i + Hv = zeros(eltype(x), nlp.meta.nvar) + hprod!(nlp, i, x, v, Hv) +end + +""" + Hx = hess(nlp, i, x) + +Evaluate the i-th objective Hessian at `x` as a sparse matrix. +Only the lower triangle is returned. +""" +function NLPModels.hess(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector) + @lencheck nlp.meta.nvar x + NLPModels.@rangecheck 1 nlp.meta.nobj i + rows, cols = hess_structure(nlp, i) + vals = hess_coord(nlp, i, x) + sparse(rows, cols, vals, nlp.meta.nvar, nlp.meta.nvar) +end + +""" + H = hess_op(nlp, i, x) + +Return the objective Hessian at `x` as a linear operator. +The resulting object may be used as if it were a +matrix, e.g., `H * v`. +""" +function hess_op(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector) + @lencheck nlp.meta.nvar x + NLPModels.@rangecheck 1 nlp.meta.nobj i + prod = @closure v -> hprod(nlp, i, x, v) + return LinearOperator{eltype(x)}(nlp.meta.nvar, nlp.meta.nvar, + true, true, prod, prod, prod) +end + +""" + H = hess_op!(nlp, i, x, Hv) + +Return the objective Hessian at `x` as a linear operator. +The resulting object may be used as if it were a matrix, e.g., `H * v`. +The vector `Hv` is used as preallocated storage for the operation. +""" +function hess_op!(nlp :: AbstractMultObjModel, i :: Integer, x :: AbstractVector, Hv :: AbstractVector) + @lencheck nlp.meta.nvar x Hv + NLPModels.@rangecheck 1 nlp.meta.nobj i + prod = @closure v -> hprod!(nlp, i, x, v, Hv) + return LinearOperator{eltype(x)}(nlp.meta.nvar, nlp.meta.nvar, + true, true, prod, prod, prod) +end + +# NLPModels functions + +function NLPModels.obj(nlp :: AbstractMultObjModel, x :: AbstractVector) + @lencheck nlp.meta.nvar x + return sum(obj(nlp, i, x) * nlp.meta.weights[i] for i = 1:nlp.meta.nobj) +end + +function NLPModels.grad!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, g :: AbstractVector) where T + @lencheck nlp.meta.nvar x g + fill!(g, zero(T)) + gi = fill!(similar(g), zero(T)) + for i = 1:nlp.meta.nobj + grad!(nlp, i, x, gi) + g .+= nlp.meta.weights[i] * gi + end + return g +end + +# ∇²f(x) = ∑ᵢ σᵢ ∇²fᵢ(x) +# rows = [rows₁, …, rowsₚ] +# cols = [cols₁, …, colsₚ] +function NLPModels.hess_structure!(nlp :: AbstractMultObjModel, rows :: AbstractVector{<: Integer}, cols :: AbstractVector{<: Integer}) + @lencheck nlp.meta.nnzh rows cols + c = 0 + for i = 1:nlp.meta.nobj + nnzi = nlp.meta.nnzhi[i] + idx = c .+ (1:nnzi) + @views hess_structure!(nlp, i, rows[idx], cols[idx]) + c += nnzi + end + return rows, cols +end + +function NLPModels.hess_coord!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, vals :: AbstractVector; obj_weight :: Real=one(T)) where T + @lencheck nlp.meta.nvar x + @lencheck nlp.meta.nnzh vals + c = 0 + for i = 1:nlp.meta.nobj + nnzi = nlp.meta.nnzhi[i] + idx = c .+ (1:nnzi) + @views hess_coord!(nlp, i, x, vals[idx]) + vals[idx] .*= obj_weight * nlp.meta.weights[i] + c += nnzi + end + return vals +end + +function NLPModels.hess_coord!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, y :: AbstractVector, vals :: AbstractVector; obj_weight :: Real=one(T)) where T +end + +function NLPModels.hprod!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Real=one(T)) where T + @lencheck nlp.meta.nvar x v Hv + fill!(Hv, zero(T)) + Hiv = fill!(similar(Hv), zero(T)) + for i = 1:nlp.meta.nobj + hprod!(nlp, i, x, v, Hiv) + Hv .+= nlp.meta.weights[i] * Hiv + end + return Hv +end + +function NLPModels.hprod!(nlp :: AbstractMultObjModel, x :: AbstractVector{T}, y :: AbstractVector, v :: AbstractVector, Hv :: AbstractVector; obj_weight :: Real=one(T)) where T end \ No newline at end of file diff --git a/src/auxiliary.jl b/src/auxiliary.jl new file mode 100644 index 0000000..c1904be --- /dev/null +++ b/src/auxiliary.jl @@ -0,0 +1,61 @@ +export increment! + +sum_counters(nls :: AbstractMultObjModel) = sum_counters(nls.counters) + +import Base.show +show_header(io :: IO, nlp :: AbstractMultObjModel) = println(io, typeof(nlp)) + +function show(io :: IO, nlp :: AbstractMultObjModel) + show_header(io, nlp) + show(io, nlp.meta) + show(io, nlp.counters) +end + +for counter in fieldnames(MultObjCounters) + counter == :counters && continue + @eval begin + """ + $($counter)(nlp) + + Get the number of `$(split("$($counter)", "_")[2])` evaluations. + """ + $counter(nls :: AbstractMultObjModel) = nls.counters.$counter + export $counter + end +end + +for counter in fieldnames(Counters) + @eval begin + NLPModels.$counter(nls :: AbstractMultObjModel) = nls.counters.counters.$counter + export $counter + end +end + +import NLPModels.increment! + +""" + increment!(nlp, s, i) + +Increment counter `s[i]` of problem `nlp`. +""" +function increment!(nlp :: AbstractMultObjModel, s :: Symbol, i :: Integer) + getproperty(nlp.counters, s)[i] += 1 +end + +function NLPModels.increment!(nlp :: AbstractMultObjModel, s :: Symbol) + setproperty!(nlp.counters.counters, s, getproperty(nlp.counters.counters, s) + 1) +end + +function NLPModels.reset!(nls :: AbstractMultObjModel) + reset!(nls.counters) + return nls +end + +# TODO: Make these functions more general inside NLPModels.jl: + +NLPModels.has_bounds(meta::MultObjNLPMeta) = length(meta.ifree) < meta.nvar +NLPModels.bound_constrained(meta::MultObjNLPMeta) = meta.ncon == 0 && has_bounds(meta) +NLPModels.unconstrained(meta::MultObjNLPMeta) = meta.ncon == 0 && !has_bounds(meta) +NLPModels.linearly_constrained(meta::MultObjNLPMeta) = meta.nlin == meta.ncon > 0 +NLPModels.equality_constrained(meta::MultObjNLPMeta) = length(meta.jfix) == meta.ncon > 0 +NLPModels.inequality_constrained(meta::MultObjNLPMeta) = meta.ncon > 0 && length(meta.jfix) == 0 \ No newline at end of file diff --git a/src/counters.jl b/src/counters.jl index f7284d3..5cc517c 100644 --- a/src/counters.jl +++ b/src/counters.jl @@ -1,48 +1,36 @@ -export MultObjCounters - -mutable struct MultObjCounters - counters :: Counters - neval_obji :: Vector{Int} # Number of individual objective evaluations - neval_gradi :: Vector{Int} # Number of individual objective gradient evaluations. - neval_hessi :: Vector{Int} # Number of individual objective Hessian evaluations. - neval_hiprod :: Vector{Int} # Number of individual objective Hessian-vector products. - - function MultObjCounters(nobj) - return new(Counters(), zeros(Int, nobj), zeros(Int, nobj), zeros(Int, nobj), zeros(Int, nobj)) - end -end - -import Base.getproperty, Base.setproperty! -function getproperty(c :: MultObjCounters, f :: Symbol) - if f in fieldnames(Counters) - getfield(c.counters, f) - else - getfield(c, f) - end -end - -function setproperty!(c :: MultObjCounters, f :: Symbol, x) - if f in fieldnames(Counters) - setfield!(c.counters, f, x) - else - setfield!(c, f, x) - end -end - -function sum_counters(c :: MultObjCounters) - s = sum_counters(c.counters) - for field in fieldnames(MultObjCounters) - field == :counters && continue - s += sum(getfield(c, field)) - end - return s -end - -function NLPModels.reset!(c :: MultObjCounters) - for f in fieldnames(MultObjCounters) - f == :counters && continue - fill!(getfield(c, f), 0) - end - reset!(c.counters) - return c -end +export MultObjCounters + +mutable struct MultObjCounters + counters :: Counters + neval_obji :: Vector{Int} # Number of individual objective evaluations + neval_gradi :: Vector{Int} # Number of individual objective gradient evaluations. + neval_hessi :: Vector{Int} # Number of individual objective Hessian evaluations. + neval_hiprod :: Vector{Int} # Number of individual objective Hessian-vector products. + + function MultObjCounters(nobj) + return new(Counters(), zeros(Int, nobj), zeros(Int, nobj), zeros(Int, nobj), zeros(Int, nobj)) + end +end + +import Base.getproperty, Base.setproperty! + +function sum_counters(c :: MultObjCounters) + s = 0 + for field in fieldnames(Counters) + s += getfield(c.counters, field) + end + for field in fieldnames(MultObjCounters) + field == :counters && continue + s += sum(getfield(c, field)) + end + return s +end + +function NLPModels.reset!(c :: MultObjCounters) + for f in fieldnames(MultObjCounters) + f == :counters && continue + fill!(getfield(c, f), 0) + end + reset!(c.counters) + return c +end diff --git a/src/linear-regression-model.jl b/src/linear-regression-model.jl new file mode 100644 index 0000000..9490c58 --- /dev/null +++ b/src/linear-regression-model.jl @@ -0,0 +1,49 @@ +export LinearRegressionModel + +mutable struct LinearRegressionModel <: AbstractRegressionModel + meta :: MultObjNLPMeta + counters :: MultObjCounters + + X :: Matrix + y :: Vector + h # h(x, w) + ℓ # ℓ(y, ŷ) +end + +function LinearRegressionModel(X, y) + n, p = size(X) + + meta = MultObjNLPMeta(p, n) + counters = MultObjCounters(n) + + h(x, β) = dot(x, β) + ℓ(y, ŷ) = (y - ŷ)^2 / 2 + return LinearRegressionModel(meta, counters, X, y, h, ℓ) +end + +function NLPModels.obj(nlp :: LinearRegressionModel, i :: Integer, β :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_obji, i) + return @views (nlp.y[i] - dot(nlp.X[i,:], β)) ^ 2 / 2 +end + +function NLPModels.grad!(nlp :: LinearRegressionModel, i :: Integer, β :: AbstractVector, g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_gradi, i) + xi = @view nlp.X[i,:] + g .= (dot(xi, β) - nlp.y[i]) * xi + return g +end + +function NLPModels.grad!(nlp :: LinearRegressionModel, J :: AbstractVector{<:Integer}, β :: AbstractVector, g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + for i in J + increment!(nlp, :neval_gradi, i) + end + XJ = @view nlp.X[J,:] + yJ = @view nlp.y[J] + g .= XJ' * (XJ * β - yJ) + return g +end \ No newline at end of file diff --git a/src/logistic-regression-model.jl b/src/logistic-regression-model.jl new file mode 100644 index 0000000..f86fe2f --- /dev/null +++ b/src/logistic-regression-model.jl @@ -0,0 +1,52 @@ +export LogisticRegressionModel + +mutable struct LogisticRegressionModel <: AbstractRegressionModel + meta :: MultObjNLPMeta + counters :: MultObjCounters + + X :: Matrix + y :: Vector + h # h(x, w) + ℓ # ℓ(y, ŷ) +end + +function LogisticRegressionModel(X, y) + n, p = size(X) + + meta = MultObjNLPMeta(p, n) + counters = MultObjCounters(n) + + h(x, β) = 1 / (1 + exp(-dot(x, β))) + ℓ(y, ŷ) = -y * log(ŷ + 1e-8) - (1 - y) * log(1 - ŷ + 1e-8) + return LogisticRegressionModel(meta, counters, X, y, h, ℓ) +end + +function NLPModels.obj(nlp :: LogisticRegressionModel, i :: Integer, β :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_obji, i) + xi = @view nlp.X[i,:] + ŷ = 1 / (1 + exp(-dot(xi, β))) + yi = nlp.y[i] + return -yi * log(ŷ + 1e-8) - (1 - yi) * log(1 - ŷ + 1e-8) +end + +function NLPModels.grad!(nlp :: LogisticRegressionModel, i :: Integer, β :: AbstractVector, g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_gradi, i) + xi = @view nlp.X[i,:] + g .= (1 / (1 + exp(-dot(xi, β))) - nlp.y[i]) * xi + return g +end + +function NLPModels.grad!(nlp :: LogisticRegressionModel, J :: AbstractVector{<:Integer}, β :: AbstractVector, g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + for i in J + increment!(nlp, :neval_gradi, i) + end + XJ = @view nlp.X[J,:] + yJ = @view nlp.y[J] + g .= XJ' * (1 ./ (1 .+ exp.(-XJ * β)) .- yJ) + return g +end \ No newline at end of file diff --git a/src/meta.jl b/src/meta.jl index 3ab2696..4855ca7 100644 --- a/src/meta.jl +++ b/src/meta.jl @@ -1,192 +1,192 @@ -export MultObjNLPMeta - -import NLPModels.@rangecheck - -struct MultObjNLPMeta <: AbstractNLPModelMeta - - # A composite type that represents the main features of - # the optimization problem - # - # optimize obj(x) = ∑ᵢ σᵢ objᵢ(x) - # subject to lvar ≤ x ≤ uvar - # lcon ≤ cons(x) ≤ ucon - # - # where x is an nvar-dimensional vector, - # σᵢ are the weights of the objectives, - # objᵢ are each real-valued objective functions, - # cons is the vector-valued constraint function, - # optimize is either "minimize" or "maximize". - # - # Here, lvar, uvar, lcon and ucon are vectors. Some of their - # components may be infinite to indicate that the corresponding - # bound or general constraint is not present. - - nobj :: Int # number of objectives - weights :: Vector # weights of each objective - objtypes :: Vector{Symbol} # types of the objectives (:lin, :quad, :gen) - nnzhi :: Vector{Int} # number of elements needed to store the nonzeros in the sparse Hessian of the i-th objective - - nvar :: Int # number of variables - x0 :: Vector # initial guess - lvar :: Vector # vector of lower bounds - uvar :: Vector # vector of upper bounds - - ifix :: Vector{Int} # indices of fixed variables - ilow :: Vector{Int} # indices of variables with lower bound only - iupp :: Vector{Int} # indices of variables with upper bound only - irng :: Vector{Int} # indices of variables with lower and upper bound (range) - ifree :: Vector{Int} # indices of free variables - iinf :: Vector{Int} # indices of infeasible bounds - - nbv :: Int # number of linear binary variables - niv :: Int # number of linear non-binary integer variables - nlvb :: Int # number of nonlinear variables in both objectives and constraints - nlvo :: Int # number of nonlinear variables in objectives (includes nlvb) - nlvc :: Int # number of nonlinear variables in constraints (includes nlvb) - nlvbi :: Int # number of integer nonlinear variables in both objectives and constraints - nlvci :: Int # number of integer nonlinear variables in constraints only - nlvoi :: Int # number of integer nonlinear variables in objectives only - nwv :: Int # number of linear network (arc) variables - - ncon :: Int # number of general constraints - y0 :: Vector # initial Lagrange multipliers - lcon :: Vector # vector of constraint lower bounds - ucon :: Vector # vector of constraint upper bounds - - jfix :: Vector{Int} # indices of equality constraints - jlow :: Vector{Int} # indices of constraints of the form c(x) ≥ cl - jupp :: Vector{Int} # indices of constraints of the form c(x) ≤ cu - jrng :: Vector{Int} # indices of constraints of the form cl ≤ c(x) ≤ cu - jfree :: Vector{Int} # indices of "free" constraints (there shouldn't be any) - jinf :: Vector{Int} # indices of the visibly infeasible constraints - - nnzo :: Int # number of nonzeros in all objectives gradients - nnzj :: Int # number of elements needed to store the nonzeros in the sparse Jacobian - nnzh :: Int # number of elements needed to store the nonzeros in the sparse Hessian - - nlin :: Int # number of linear constraints - nnln :: Int # number of nonlinear general constraints - nnnet :: Int # number of nonlinear network constraints - nlnet :: Int # number of linear network constraints - - lin :: Vector{Int} # indices of linear constraints - nln :: Vector{Int} # indices of nonlinear constraints - nnet :: Vector{Int} # indices of nonlinear network constraints - lnet :: Vector{Int} # indices of linear network constraints - - minimize :: Bool # true if optimize == minimize - nlo :: Int # number of nonlinear objectives - islp :: Bool # true if the problem is a linear program - name :: String # problem name - - function MultObjNLPMeta(nvar, nobj; - x0=zeros(nvar,), - lvar=-Inf * ones(nvar,), - uvar=Inf * ones(nvar,), - weights=ones(nobj), - objtypes=fill(:gen, nobj), - nnzhi=fill(div(nvar * (nvar + 1), 2), nobj), - nbv=0, - niv=0, - nlvb=nvar, - nlvo=nvar, - nlvc=nvar, - nlvbi=0, - nlvci=0, - nlvoi=0, - nwv=0, - ncon=0, - y0=zeros(ncon,), - lcon=-Inf * ones(ncon,), - ucon=Inf * ones(ncon,), - nnzo=nvar, - nnzj=nvar * ncon, - nnzh=nvar * (nvar + 1) / 2, - lin=Int[], - nln=1:ncon, - nnet=Int[], - lnet=Int[], - nlin=length(lin), - nnln=length(nln), - nnnet=length(nnet), - nlnet=length(lnet), - minimize=true, - nlo=1, - islp=false, - name="Generic") - if (nvar < 1) || (ncon < 0) - error("Nonsensical dimensions") - end - - @lencheck nobj weights objtypes nnzhi - @lencheck nvar x0 lvar uvar - @lencheck ncon y0 lcon ucon - @lencheck nlin lin - @lencheck nnln nln - @lencheck nnnet nnet - @lencheck nlnet lnet - @rangecheck 1 ncon lin nln nnet lnet - if !(unique(objtypes) ⊆ [:lin, :quad, :gen]) - error("The objective types should be chosen from the set [:lin, :quad, :gen]") - end - nnzhi .= max.(0, nnzhi) - - ifix = findall(lvar .== uvar) - ilow = findall((lvar .> -Inf) .& (uvar .== Inf)) - iupp = findall((lvar .== -Inf) .& (uvar .< Inf)) - irng = findall((lvar .> -Inf) .& (uvar .< Inf) .& (lvar .< uvar)) - ifree = findall((lvar .== -Inf) .& (uvar .== Inf)) - iinf = findall(lvar .> uvar) - - jfix = findall(lcon .== ucon) - jlow = findall((lcon .> -Inf) .& (ucon .== Inf)) - jupp = findall((lcon .== -Inf) .& (ucon .< Inf)) - jrng = findall((lcon .> -Inf) .& (ucon .< Inf) .& (lcon .< ucon)) - jfree = findall((lcon .== -Inf) .& (ucon .== Inf)) - jinf = findall(lcon .> ucon) - - nnzj = max(0, nnzj) - nnzh = max(0, nnzh) - - new(nobj, weights, objtypes, nnzhi, - nvar, x0, lvar, uvar, - ifix, ilow, iupp, irng, ifree, iinf, - nbv, niv, nlvb, nlvo, nlvc, - nlvbi, nlvci, nlvoi, nwv, - ncon, y0, lcon, ucon, - jfix, jlow, jupp, jrng, jfree, jinf, - nnzo, nnzj, nnzh, - nlin, nnln, nnnet, nlnet, lin, nln, nnet, lnet, - minimize, nlo, islp, name) - end -end - -import NLPModels: histline, lines_of_hist, sparsityline - -function lines_of_description(m :: MultObjNLPMeta) - V = [length(m.ifree), length(m.ilow), length(m.iupp), length(m.irng), length(m.ifix), length(m.iinf)] - V = [sum(V); V] - S = ["All variables", "free", "lower", "upper", "low/upp", "fixed", "infeas"] - varlines = lines_of_hist(S, V) - push!(varlines, sparsityline("nnzh", m.nnzh, m.nvar * (m.nvar + 1) / 2)) - - V = [length(m.jfree), length(m.jlow), length(m.jupp), length(m.jrng), length(m.jfix), length(m.jinf)] - V = [sum(V); V] - S = ["All constraints", "free", "lower", "upper", "low/upp", "fixed", "infeas"] - conlines = lines_of_hist(S, V) - push!(conlines, histline("linear", m.nlin, m.ncon), histline("nonlinear", m.nnln, m.ncon)) - push!(conlines, sparsityline("nnzj", m.nnzj, m.nvar * m.ncon)) - - append!(varlines, repeat([" "^length(varlines[1])], length(conlines) - length(varlines))) - lines = [" TODO: Improve this"; varlines .* conlines] - - return lines -end - -import Base.show -function show(io :: IO, m :: MultObjNLPMeta) - println(io, " Problem name: $(m.name)") - println(io, " Number of objectives: $(m.nobj)") - lines = lines_of_description(m) - println(io, join(lines, "\n") * "\n") +export MultObjNLPMeta + +import NLPModels.@rangecheck + +struct MultObjNLPMeta <: AbstractNLPModelMeta + + # A composite type that represents the main features of + # the optimization problem + # + # optimize obj(x) = ∑ᵢ σᵢ objᵢ(x) + # subject to lvar ≤ x ≤ uvar + # lcon ≤ cons(x) ≤ ucon + # + # where x is an nvar-dimensional vector, + # σᵢ are the weights of the objectives, + # objᵢ are each real-valued objective functions, + # cons is the vector-valued constraint function, + # optimize is either "minimize" or "maximize". + # + # Here, lvar, uvar, lcon and ucon are vectors. Some of their + # components may be infinite to indicate that the corresponding + # bound or general constraint is not present. + + nobj :: Int # number of objectives + weights :: Vector # weights of each objective + objtypes :: Vector{Symbol} # types of the objectives (:lin, :quad, :gen) + nnzhi :: Vector{Int} # number of elements needed to store the nonzeros in the sparse Hessian of the i-th objective + + nvar :: Int # number of variables + x0 :: Vector # initial guess + lvar :: Vector # vector of lower bounds + uvar :: Vector # vector of upper bounds + + ifix :: Vector{Int} # indices of fixed variables + ilow :: Vector{Int} # indices of variables with lower bound only + iupp :: Vector{Int} # indices of variables with upper bound only + irng :: Vector{Int} # indices of variables with lower and upper bound (range) + ifree :: Vector{Int} # indices of free variables + iinf :: Vector{Int} # indices of infeasible bounds + + nbv :: Int # number of linear binary variables + niv :: Int # number of linear non-binary integer variables + nlvb :: Int # number of nonlinear variables in both objectives and constraints + nlvo :: Int # number of nonlinear variables in objectives (includes nlvb) + nlvc :: Int # number of nonlinear variables in constraints (includes nlvb) + nlvbi :: Int # number of integer nonlinear variables in both objectives and constraints + nlvci :: Int # number of integer nonlinear variables in constraints only + nlvoi :: Int # number of integer nonlinear variables in objectives only + nwv :: Int # number of linear network (arc) variables + + ncon :: Int # number of general constraints + y0 :: Vector # initial Lagrange multipliers + lcon :: Vector # vector of constraint lower bounds + ucon :: Vector # vector of constraint upper bounds + + jfix :: Vector{Int} # indices of equality constraints + jlow :: Vector{Int} # indices of constraints of the form c(x) ≥ cl + jupp :: Vector{Int} # indices of constraints of the form c(x) ≤ cu + jrng :: Vector{Int} # indices of constraints of the form cl ≤ c(x) ≤ cu + jfree :: Vector{Int} # indices of "free" constraints (there shouldn't be any) + jinf :: Vector{Int} # indices of the visibly infeasible constraints + + nnzo :: Int # number of nonzeros in all objectives gradients + nnzj :: Int # number of elements needed to store the nonzeros in the sparse Jacobian + nnzh :: Int # number of elements needed to store the nonzeros in the sparse Hessian + + nlin :: Int # number of linear constraints + nnln :: Int # number of nonlinear general constraints + nnnet :: Int # number of nonlinear network constraints + nlnet :: Int # number of linear network constraints + + lin :: Vector{Int} # indices of linear constraints + nln :: Vector{Int} # indices of nonlinear constraints + nnet :: Vector{Int} # indices of nonlinear network constraints + lnet :: Vector{Int} # indices of linear network constraints + + minimize :: Bool # true if optimize == minimize + nlo :: Int # number of nonlinear objectives + islp :: Bool # true if the problem is a linear program + name :: String # problem name + + function MultObjNLPMeta(nvar, nobj; + x0=zeros(nvar,), + lvar=-Inf * ones(nvar,), + uvar=Inf * ones(nvar,), + weights=ones(nobj), + objtypes=fill(:gen, nobj), + nnzhi=fill(div(nvar * (nvar + 1), 2), nobj), + nbv=0, + niv=0, + nlvb=nvar, + nlvo=nvar, + nlvc=nvar, + nlvbi=0, + nlvci=0, + nlvoi=0, + nwv=0, + ncon=0, + y0=zeros(ncon,), + lcon=-Inf * ones(ncon,), + ucon=Inf * ones(ncon,), + nnzo=nvar, + nnzj=nvar * ncon, + nnzh=nvar * (nvar + 1) / 2, + lin=Int[], + nln=1:ncon, + nnet=Int[], + lnet=Int[], + nlin=length(lin), + nnln=length(nln), + nnnet=length(nnet), + nlnet=length(lnet), + minimize=true, + nlo=1, + islp=false, + name="Generic") + if (nvar < 1) || (ncon < 0) + error("Nonsensical dimensions") + end + + @lencheck nobj weights objtypes nnzhi + @lencheck nvar x0 lvar uvar + @lencheck ncon y0 lcon ucon + @lencheck nlin lin + @lencheck nnln nln + @lencheck nnnet nnet + @lencheck nlnet lnet + @rangecheck 1 ncon lin nln nnet lnet + if !(unique(objtypes) ⊆ [:lin, :quad, :gen]) + error("The objective types should be chosen from the set [:lin, :quad, :gen]") + end + nnzhi .= max.(0, nnzhi) + + ifix = findall(lvar .== uvar) + ilow = findall((lvar .> -Inf) .& (uvar .== Inf)) + iupp = findall((lvar .== -Inf) .& (uvar .< Inf)) + irng = findall((lvar .> -Inf) .& (uvar .< Inf) .& (lvar .< uvar)) + ifree = findall((lvar .== -Inf) .& (uvar .== Inf)) + iinf = findall(lvar .> uvar) + + jfix = findall(lcon .== ucon) + jlow = findall((lcon .> -Inf) .& (ucon .== Inf)) + jupp = findall((lcon .== -Inf) .& (ucon .< Inf)) + jrng = findall((lcon .> -Inf) .& (ucon .< Inf) .& (lcon .< ucon)) + jfree = findall((lcon .== -Inf) .& (ucon .== Inf)) + jinf = findall(lcon .> ucon) + + nnzj = max(0, nnzj) + nnzh = max(0, nnzh) + + new(nobj, weights, objtypes, nnzhi, + nvar, x0, lvar, uvar, + ifix, ilow, iupp, irng, ifree, iinf, + nbv, niv, nlvb, nlvo, nlvc, + nlvbi, nlvci, nlvoi, nwv, + ncon, y0, lcon, ucon, + jfix, jlow, jupp, jrng, jfree, jinf, + nnzo, nnzj, nnzh, + nlin, nnln, nnnet, nlnet, lin, nln, nnet, lnet, + minimize, nlo, islp, name) + end +end + +import NLPModels: histline, lines_of_hist, sparsityline + +function lines_of_description(m :: MultObjNLPMeta) + V = [length(m.ifree), length(m.ilow), length(m.iupp), length(m.irng), length(m.ifix), length(m.iinf)] + V = [sum(V); V] + S = ["All variables", "free", "lower", "upper", "low/upp", "fixed", "infeas"] + varlines = lines_of_hist(S, V) + push!(varlines, sparsityline("nnzh", m.nnzh, m.nvar * (m.nvar + 1) / 2)) + + V = [length(m.jfree), length(m.jlow), length(m.jupp), length(m.jrng), length(m.jfix), length(m.jinf)] + V = [sum(V); V] + S = ["All constraints", "free", "lower", "upper", "low/upp", "fixed", "infeas"] + conlines = lines_of_hist(S, V) + push!(conlines, histline("linear", m.nlin, m.ncon), histline("nonlinear", m.nnln, m.ncon)) + push!(conlines, sparsityline("nnzj", m.nnzj, m.nvar * m.ncon)) + + append!(varlines, repeat([" "^length(varlines[1])], length(conlines) - length(varlines))) + lines = [" TODO: Improve this"; varlines .* conlines] + + return lines +end + +import Base.show +function show(io :: IO, m :: MultObjNLPMeta) + println(io, " Problem name: $(m.name)") + println(io, " Number of objectives: $(m.nobj)") + lines = lines_of_description(m) + println(io, join(lines, "\n") * "\n") end \ No newline at end of file diff --git a/src/regression_model.jl b/src/regression_model.jl index 8ed6b28..c7f26d8 100644 --- a/src/regression_model.jl +++ b/src/regression_model.jl @@ -1,48 +1,52 @@ -export RegressionModel, LinearRegressionModel, LogisticRegressionModel - -using ForwardDiff, LinearAlgebra - -mutable struct RegressionModel <: AbstractMultObjModel - meta :: MultObjNLPMeta - counters :: MultObjCounters - - X :: Matrix - y :: Vector - h # h(x, w) - ℓ # ℓ(y, ŷ) -end - -function RegressionModel(X, y, h, ℓ) - n, p = size(X) - - meta = MultObjNLPMeta(p, n) - counters = MultObjCounters(n) - return RegressionModel(meta, counters, X, y, h, ℓ) -end - -function LinearRegressionModel(X, y) - h(x, β) = dot(x,β) - ℓ(y, ŷ) = (y - ŷ)^2 / 2 - return RegressionModel(X, y, h, ℓ) -end - -function LogisticRegressionModel(X, y) - h(x, β) = 1 / (1 + exp(-dot(x, β))) - ℓ(y, ŷ) = -y * log(ŷ + 1e-8) - (1 - y) * log(1 - ŷ + 1e-8) - return RegressionModel(X, y, h, ℓ) -end - -function MultObjNLPModels.obj(nlp :: RegressionModel, i :: Integer, β :: AbstractVector) - NLPModels.@lencheck nlp.meta.nvar β - NLPModels.@rangecheck 1 nlp.meta.nobj i - increment!(nlp, :neval_obji, i) - return nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)) -end - -function MultObjNLPModels.grad!(nlp :: RegressionModel, i :: Integer, β :: AbstractVector , g :: AbstractVector) - NLPModels.@lencheck nlp.meta.nvar β g - NLPModels.@rangecheck 1 nlp.meta.nobj i - increment!(nlp, :neval_gradi, i) - ForwardDiff.gradient!(g, β->nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)), β) - return g -end \ No newline at end of file +export RegressionModel, LogisticRegressionModel + +using ForwardDiff, LinearAlgebra + +abstract type AbstractRegressionModel <: AbstractMultObjModel end + +include("linear-regression-model.jl") +include("logistic-regression-model.jl") + +mutable struct RegressionModel <: AbstractRegressionModel + meta :: MultObjNLPMeta + counters :: MultObjCounters + + X :: Matrix + y :: Vector + h # h(x, w) + ℓ # ℓ(y, ŷ) +end + +function RegressionModel(X, y, h, ℓ) + n, p = size(X) + + meta = MultObjNLPMeta(p, n) + counters = MultObjCounters(n) + return RegressionModel(meta, counters, X, y, h, ℓ) +end + +function NLPModels.obj(nlp :: RegressionModel, i :: Integer, β :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_obji, i) + return nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)) +end + +function NLPModels.grad!(nlp :: RegressionModel, i :: Integer, β :: AbstractVector , g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + NLPModels.@rangecheck 1 nlp.meta.nobj i + increment!(nlp, :neval_gradi, i) + ForwardDiff.gradient!(g, β->nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)), β) + return g +end + +function NLPModels.grad!(nlp :: RegressionModel, J :: AbstractVector{<: Integer}, β :: AbstractVector , g :: AbstractVector) + NLPModels.@lencheck nlp.meta.nvar β g + for i in J + increment!(nlp, :neval_gradi, i) + end + ForwardDiff.gradient!(g, + β -> sum(nlp.ℓ(nlp.y[i], nlp.h(nlp.X[i,:], β)) for i in J), + β) + return g +end \ No newline at end of file diff --git a/src/sthocastic_gradient.jl b/src/sthocastic_gradient.jl index b39e069..5ab02b3 100644 --- a/src/sthocastic_gradient.jl +++ b/src/sthocastic_gradient.jl @@ -1,91 +1,117 @@ -using LinearAlgebra, Random, SolverTools - -NLPModels.has_bounds(meta::MultObjNLPMeta) = length(meta.ifree) < meta.nvar -NLPModels.unconstrained(meta::MultObjNLPMeta) = meta.ncon == 0 && !has_bounds(meta) - -export sthocastic_gradient - -function sthocastic_gradient( - nlp::AbstractMultObjModel; - learning_rate::Symbol = :optimal, - γ::Float64 = 1e-2, - α::Float64 = 1e-2, # for penalty - ρ::Float64 = 0.85, # just for elasticnet - penalty::Symbol = :l2, - max_eval::Int = 50, - max_time::Float64 = 60.0, - atol::Float64 = 1e-8, - rtol::Float64 = 1e-8, - power_t::Float64 = 1e-2 - ) - - iter = 0 - start_time = time() - β = nlp.meta.x0 - βavg = similar(β) - n = nlp.meta.nobj - g = similar(β) - f = NLPModels.obj(nlp, β) - NLPModels.grad!(nlp, β, g) - - Δt = time() - start_time - tired = (Δt > max_time) - itermax = iter > max_eval - - @info log_header([:iter, :f], - [Int, Float64], - hdr_override=Dict(:f => "f(β)")) - - @info log_row(Any[iter, f]) - - - P = if penalty == :l2 - β -> β - elseif penalty == :l1 - β -> sign.(β) - elseif penalty == :elasticnet - β -> ρ * β + (1 - ρ) * sign.(β) - end - - - for k = 1:max_eval - - if learning_rate == :optimal - γ = 1 / (α * (1e3 + iter)) - elseif learning_rate == :invscaling - γ = 1e-2 / (iter + 1)^power_t - end - - βavg .= 0 - for i in shuffle(1:n) - β -= γ * (α * P(β) + grad!(nlp, i, β, g)) - βavg += β - end - βavg = βavg / n - - Δt = time() - start_time - tired = Δt > max_time - solved = γ < 1e-2 - iter += 1 - itermax = iter > max_eval - end - - status = if (iter == max_eval) - :first_order - elseif tired - if Δt >: max_time - :max_time - elseif itermax - :max_eval - end - else - :unknown - end - - return GenericExecutionStats(status, nlp; - solution=β, - solver_specific=Dict(:βavg => βavg), - elapsed_time=Δt, - iter=iter - ) -end +using LinearAlgebra, Random, SolverTools + +export sthocastic_gradient + +function sthocastic_gradient( + nlp::AbstractMultObjModel; + learning_rate::Symbol = :optimal, + γ::Float64 = 1e-2, + α::Float64 = 1e-2, # for penalty + ρ::Float64 = 0.85, # just for elasticnet + penalty::Symbol = :l2, + max_eval::Int = 0, + max_time::Float64 = 60.0, + max_iter::Int = 100, + atol::Float64 = 1e-8, + rtol::Float64 = 1e-8, + batch_size::Int = 1, + ν::Float64 = 0.5, + d::Float64 = 0.5, # for some learning rate, + r::Int64 = 10, #step to decline + γ0::Float64 = 1.0 # first step + ) + + iter = 0 + start_time = time() + β = nlp.meta.x0 + n = nlp.meta.nobj + p = nlp.meta.nvar + βavg = similar(β) + g = similar(β) + + f = NLPModels.obj(nlp, β) + NLPModels.grad!(nlp, β, g) + + Δt = time() - start_time + + P = if penalty == :l2 + β -> β + elseif penalty == :l1 + β -> sign.(β) + elseif penalty == :elasticnet + β -> ρ * β + (1 - ρ) * sign.(β) + end + + if learning_rate == :optimal && α == 0.0 + @warn("Can't use learning_rate optimal with α = 0.0. Changing to learning_rate step_based.") + learning_rate = :step_based + end + + γ = if learning_rate == :optimal + iter -> γ0 / (α * (1e3 + iter)) + elseif learning_rate == :invscaling + iter -> γ0 / (iter + 1) ^ ν + elseif learning_rate == :time_based + iter -> γ0 / (1 + d * iter) + elseif learning_rate == :step_based + iter -> γ0 * d ^ div(1 + iter, r) + elseif learning_rate == :exponential + iter -> γ0 / exp(d * iter) + end + + status = :unknown + tired = Δt > max_time || sum_counters(nlp) > max_eval > 0 || iter > max_iter + small_step = γ(iter) < 1e-6 + + βavg .= 0 + num_batches = ceil(Int, n / batch_size) + + # betas =[] + # gamas = [] + while !(small_step || tired) + + gnorm = -1.0 + Rindex = shuffle(1:n) + for l = 1:num_batches + index = if l < num_batches + batch_size * (l - 1) .+ (1:batch_size) + else + batch_size * (l - 1) + 1:n + end + + grad!(nlp, Rindex[index], β, g) + β -= γ(iter) * (α * P(β) + g / length(index)) + βavg += β + end + + # append!(betas, β) + # append!(gamas,γ(iter)) + + βavg = βavg / num_batches + Δt = time() - start_time + iter += 1 + tired = Δt > max_time || sum_counters(nlp) > max_eval > 0 || iter > max_iter + small_step = γ(iter) < 1e-6 + end + + status = if small_step + :small_step + elseif tired + if Δt > max_time + :max_time + elseif sum_counters(nlp) > max_eval > 0 + :max_eval + elseif iter > max_iter + :max_iter + end + end + + # return iter, β, betas, gamas + + return GenericExecutionStats(status, nlp; + solution=β, + solver_specific=Dict(:βavg => βavg), + elapsed_time=Δt, + iter=iter + ) +end \ No newline at end of file