From 1967c2375518a759887ae1687bcb4c590dc09e4f Mon Sep 17 00:00:00 2001 From: Drew Dimmery Date: Mon, 24 Apr 2017 22:20:13 -0700 Subject: [PATCH 1/5] Add Positive Part James-Stein Learner This adds a positive part James Stein estimator that isn't dependent on distribution of the outcome. --- src/03_learners/06_eb_mle_learner.jl | 109 +++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 src/03_learners/06_eb_mle_learner.jl diff --git a/src/03_learners/06_eb_mle_learner.jl b/src/03_learners/06_eb_mle_learner.jl new file mode 100644 index 0000000..ac0cb1f --- /dev/null +++ b/src/03_learners/06_eb_mle_learner.jl @@ -0,0 +1,109 @@ +@doc """ +A EBMLELearner object stores the online estimated mean and variance of all +arms. Arms with zero counts use a default mean and standard deviation. +""" -> +immutable EBMLELearner <: Learner + ns::Vector{Int64} + oldMs::Vector{Float64} + newMs::Vector{Float64} + Ss::Vector{Float64} + μs::Vector{Float64} + σs::Vector{Float64} + μ₀::Float64 + σ₀::Float64 + K::Int64 +end + +@doc """ +Create an EBMLELearner object specifying only a default mean and standard +deviation. +""" -> +function EBMLELearner(μ₀::Real, σ₀::Real) + return EBMLELearner( + Array(Int64, 0), + Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), + Float64(μ₀), + Float64(σ₀), + Int64(1) + ) +end + +@doc """ +Return the counts for each arm. +""" -> +counts(learner::EBMLELearner) = learner.ns + +@doc """ +Return the means for each arm. +""" -> +means(learner::EBMLELearner) = learner.μ̃s + +@doc """ +Return the standard deviations for each arm. +""" -> +stds(learner::EBMLELearner) = learner.σ̃s + +@doc """ +Reset the EBMLELearner object for K arms. +""" -> +function initialize!(learner::EBMLELearner, K::Integer) + learner.K = K + resize!(learner.ns, K) + resize!(learner.oldMs, K) + resize!(learner.newMs, K) + resize!(learner.Ss, K) + resize!(learner.μs, K) + resize!(learner.σs, K) + + fill!(learner.ns, 0) + fill!(learner.μs, learner.μ₀) + fill!(learner.μ̃s, learner.μ₀) + fill!(learner.σs, learner.σ₀) + fill!(learner.σ̃s, learner.σ̃₀) + + return +end + +@doc """ +Learn about arm a on trial t from reward r. +""" -> +function learn!( + learner::EBMLELearner, + context::Context, + a::Integer, + r::Real, +) + learner.ns[a] += 1 + nᵢ = learner.ns[a] + + if nᵢ == 1 + learner.oldMs[a] = r + learner.Ss[a] = 0.0 + learner.μs[a] = r + learner.μ̃s[a] = r + else + learner.newMs[a] = learner.oldMs[a] + (r - learner.oldMs[a]) / nᵢ + learner.Ss[a] += (r - learner.oldMs[a]) * (r - learner.newMs[a]) + learner.oldMs[a] = learner.newMs[a] + learner.μs[a] = learner.newMs[a] + learner.σs[a] = sqrt(learner.Ss[a] / (nᵢ - 1)) + y̅ = mean(learner.μs) + φs = min(1.0, learner.σs / (sumabs2(learner.μs - y̅) / (learner.K - 3))) + learner.μ̃s = learner.μs + φs .* (y̅ - learner.μs) + learner.σ̃s = sqrt( + (1 - φs) .* learner.σs + + learner.σs / learner.K + + 2 .* φs .* (learner.μs - y̅).^2 / (learner.K - 3) + ) + end + + return +end + +function Base.show(io::IO, learner::EBMLELearner) + @printf(io, "EBMLELearner(%f, %f)", learner.μ₀, learner.σ₀) +end From 22f1fe92b27d3308ab527d3fed5f39dc7cff7b74 Mon Sep 17 00:00:00 2001 From: Drew Dimmery Date: Mon, 24 Apr 2017 23:09:53 -0700 Subject: [PATCH 2/5] Rename to JamesSteinLearner Rename from a dumb name to JamesSteinLearner. Also add in fix to the stochastic bandit because it wasn't working before. --- src/02_bandits/02_stochastic_bandit.jl | 8 ++++++ ...e_learner.jl => 06_james_stein_learner.jl} | 26 +++++++++---------- src/Bandits.jl | 5 ++-- 3 files changed, 24 insertions(+), 15 deletions(-) rename src/03_learners/{06_eb_mle_learner.jl => 06_james_stein_learner.jl} (75%) diff --git a/src/02_bandits/02_stochastic_bandit.jl b/src/02_bandits/02_stochastic_bandit.jl index df940f3..4d5c27f 100644 --- a/src/02_bandits/02_stochastic_bandit.jl +++ b/src/02_bandits/02_stochastic_bandit.jl @@ -38,6 +38,14 @@ end Construct a new StochasticBandit object from a vector of probability distribution objects. """ -> +function StochasticBandit{D <: UnivariateDistribution}(arms::Vector{D}) + return StochasticBandit{D}(arms) +end + +@doc """ +Construct a new StochasticBandit object from a vector of probability +distribution objects and a time period integer. +""" -> function StochasticBandit{D <: UnivariateDistribution}(arms::Vector{D}, t::Integer) return StochasticBandit{D}(arms) end diff --git a/src/03_learners/06_eb_mle_learner.jl b/src/03_learners/06_james_stein_learner.jl similarity index 75% rename from src/03_learners/06_eb_mle_learner.jl rename to src/03_learners/06_james_stein_learner.jl index ac0cb1f..ca08fda 100644 --- a/src/03_learners/06_eb_mle_learner.jl +++ b/src/03_learners/06_james_stein_learner.jl @@ -1,8 +1,8 @@ @doc """ -A EBMLELearner object stores the online estimated mean and variance of all +A JamesSteinLearner object stores the online estimated mean and variance of all arms. Arms with zero counts use a default mean and standard deviation. """ -> -immutable EBMLELearner <: Learner +immutable JamesSteinLearner <: Learner ns::Vector{Int64} oldMs::Vector{Float64} newMs::Vector{Float64} @@ -15,11 +15,11 @@ immutable EBMLELearner <: Learner end @doc """ -Create an EBMLELearner object specifying only a default mean and standard +Create an JamesSteinLearner object specifying only a default mean and standard deviation. """ -> -function EBMLELearner(μ₀::Real, σ₀::Real) - return EBMLELearner( +function JamesSteinLearner(μ₀::Real, σ₀::Real) + return JamesSteinLearner( Array(Int64, 0), Array(Float64, 0), Array(Float64, 0), @@ -35,22 +35,22 @@ end @doc """ Return the counts for each arm. """ -> -counts(learner::EBMLELearner) = learner.ns +counts(learner::JamesSteinLearner) = learner.ns @doc """ Return the means for each arm. """ -> -means(learner::EBMLELearner) = learner.μ̃s +means(learner::JamesSteinLearner) = learner.μ̃s @doc """ Return the standard deviations for each arm. """ -> -stds(learner::EBMLELearner) = learner.σ̃s +stds(learner::JamesSteinLearner) = learner.σ̃s @doc """ -Reset the EBMLELearner object for K arms. +Reset the JamesSteinLearner object for K arms. """ -> -function initialize!(learner::EBMLELearner, K::Integer) +function initialize!(learner::JamesSteinLearner, K::Integer) learner.K = K resize!(learner.ns, K) resize!(learner.oldMs, K) @@ -72,7 +72,7 @@ end Learn about arm a on trial t from reward r. """ -> function learn!( - learner::EBMLELearner, + learner::JamesSteinLearner, context::Context, a::Integer, r::Real, @@ -104,6 +104,6 @@ function learn!( return end -function Base.show(io::IO, learner::EBMLELearner) - @printf(io, "EBMLELearner(%f, %f)", learner.μ₀, learner.σ₀) +function Base.show(io::IO, learner::JamesSteinLearner) + @printf(io, "JamesSteinLearner(%f, %f)", learner.μ₀, learner.σ₀) end diff --git a/src/Bandits.jl b/src/Bandits.jl index d356026..3d044e9 100644 --- a/src/Bandits.jl +++ b/src/Bandits.jl @@ -22,7 +22,7 @@ module Bandits include(joinpath("07_distributions", "03_nonstationary_multivariate_distribution.jl")) include(joinpath("07_distributions", "04_nonstationary_contextual_distribution.jl")) include(joinpath("07_distributions", "05_nonstationary_logistic_contextual_distribution.jl")) - include(joinpath("07_distributions", "06_nonstationary_gaussian_distribution.jl")) + include(joinpath("07_distributions", "06_nonstationary_gaussian_distribution.jl")) include(joinpath("07_distributions", "07_nonstationary_1dgaussianprocess_distribution.jl")) # Bandit @@ -35,7 +35,7 @@ module Bandits # Learners export Learner, MLELearner, BetaLearner, BootstrapLearner, - BootstrapMLELearner, EBBetaLearner, DiscLearner + BootstrapMLELearner, EBBetaLearner, DiscLearner, JamesSteinLearner export initialize!, counts, means, stds, learn!, preferred_arm include(joinpath("03_learners", "01_learner.jl")) include(joinpath("03_learners", "02_mle_learner.jl")) @@ -43,6 +43,7 @@ module Bandits include(joinpath("03_learners", "04_bootstrap_learner.jl")) include(joinpath("03_learners", "05_eb_beta_learner.jl")) include(joinpath("03_learners", "05_disc_learner.jl")) + include(joinpath("03_learners", "06_james_stein_learner.jl")) # Algorithms export From de2e9aa1316086fc64ab768097f5887d0f5bd874 Mon Sep 17 00:00:00 2001 From: Drew Dimmery Date: Wed, 26 Apr 2017 10:46:09 -0700 Subject: [PATCH 3/5] Add rand functions for MLE/James-Stein Learners This provides functions for generating samples from the (normal) posteriors of MLELearners and James-Stein learners. --- src/03_learners/02_mle_learner.jl | 12 ++++++ src/03_learners/06_james_stein_learner.jl | 47 +++++++++++++++-------- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/03_learners/02_mle_learner.jl b/src/03_learners/02_mle_learner.jl index 2d87e4c..a884881 100644 --- a/src/03_learners/02_mle_learner.jl +++ b/src/03_learners/02_mle_learner.jl @@ -90,6 +90,18 @@ function learn!( return end +@doc """ +Draw a sample from the posterior for arm a. +""" -> +function Base.rand(learner::MLELearner, a::Integer) + o = try + rand(Normal(learner.μs[a], σ)) + catch + rand(Normal(learner.μs[a],learner.σ₀)) + end + return o +end + function Base.show(io::IO, learner::MLELearner) @printf(io, "MLELearner(%f, %f)", learner.μ₀, learner.σ₀) end diff --git a/src/03_learners/06_james_stein_learner.jl b/src/03_learners/06_james_stein_learner.jl index ca08fda..4cedc39 100644 --- a/src/03_learners/06_james_stein_learner.jl +++ b/src/03_learners/06_james_stein_learner.jl @@ -7,6 +7,8 @@ immutable JamesSteinLearner <: Learner oldMs::Vector{Float64} newMs::Vector{Float64} Ss::Vector{Float64} + ys::Vector{Float64} + ss::Vector{Float64} μs::Vector{Float64} σs::Vector{Float64} μ₀::Float64 @@ -26,6 +28,8 @@ function JamesSteinLearner(μ₀::Real, σ₀::Real) Array(Float64, 0), Array(Float64, 0), Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), Float64(μ₀), Float64(σ₀), Int64(1) @@ -40,30 +44,31 @@ counts(learner::JamesSteinLearner) = learner.ns @doc """ Return the means for each arm. """ -> -means(learner::JamesSteinLearner) = learner.μ̃s +means(learner::JamesSteinLearner) = learner.μs @doc """ Return the standard deviations for each arm. """ -> -stds(learner::JamesSteinLearner) = learner.σ̃s +stds(learner::JamesSteinLearner) = learner.σs @doc """ Reset the JamesSteinLearner object for K arms. """ -> function initialize!(learner::JamesSteinLearner, K::Integer) - learner.K = K resize!(learner.ns, K) resize!(learner.oldMs, K) resize!(learner.newMs, K) resize!(learner.Ss, K) + resize!(learner.ys, K) + resize!(learner.ss, K) resize!(learner.μs, K) resize!(learner.σs, K) fill!(learner.ns, 0) + fill!(learner.ys, learner.μ₀) fill!(learner.μs, learner.μ₀) - fill!(learner.μ̃s, learner.μ₀) + fill!(learner.ss, learner.σ₀) fill!(learner.σs, learner.σ₀) - fill!(learner.σ̃s, learner.σ̃₀) return end @@ -83,27 +88,39 @@ function learn!( if nᵢ == 1 learner.oldMs[a] = r learner.Ss[a] = 0.0 + learner.ys[a] = r learner.μs[a] = r - learner.μ̃s[a] = r else learner.newMs[a] = learner.oldMs[a] + (r - learner.oldMs[a]) / nᵢ learner.Ss[a] += (r - learner.oldMs[a]) * (r - learner.newMs[a]) learner.oldMs[a] = learner.newMs[a] - learner.μs[a] = learner.newMs[a] - learner.σs[a] = sqrt(learner.Ss[a] / (nᵢ - 1)) - y̅ = mean(learner.μs) - φs = min(1.0, learner.σs / (sumabs2(learner.μs - y̅) / (learner.K - 3))) - learner.μ̃s = learner.μs + φs .* (y̅ - learner.μs) - learner.σ̃s = sqrt( - (1 - φs) .* learner.σs + - learner.σs / learner.K + - 2 .* φs .* (learner.μs - y̅).^2 / (learner.K - 3) + learner.ys[a] = learner.newMs[a] + learner.ss[a] = sqrt(learner.Ss[a] / (nᵢ - 1)) + y̅ = mean(learner.ys) + φs = max(0.0, min(1.0, learner.ss ./ (sumabs2(learner.ys - y̅) ./ (learner.K - 3)))) + learner.μs[:] = learner.ys + φs .* (y̅ - learner.ys) + learner.σs[:] = sqrt( + (1 - φs) .* learner.ss + + learner.ss ./ learner.K + + 2 .* φs .* (learner.ys - y̅).^2 ./ (learner.K - 3) ) end return end +@doc """ +Draw a sample from the posterior for arm a. +""" -> +function Base.rand(learner::JamesSteinLearner, a::Integer) + o = try + rand(Normal(learner.μs[a], learner.σs[a])) + catch + rand(Normal(learner.μs[a],learner.σ₀)) + end + return o +end + function Base.show(io::IO, learner::JamesSteinLearner) @printf(io, "JamesSteinLearner(%f, %f)", learner.μ₀, learner.σ₀) end From 76dfe71bba13ebb4f02fe4491e0712f06b334806 Mon Sep 17 00:00:00 2001 From: Drew Dimmery Date: Wed, 31 May 2017 11:04:52 -0700 Subject: [PATCH 4/5] Fix calculation of std error MLELearner was calculating SD, but want to calculate SE --- src/03_learners/02_mle_learner.jl | 11 +++-------- src/03_learners/06_james_stein_learner.jl | 21 ++++++++------------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/03_learners/02_mle_learner.jl b/src/03_learners/02_mle_learner.jl index a884881..00e10ce 100644 --- a/src/03_learners/02_mle_learner.jl +++ b/src/03_learners/02_mle_learner.jl @@ -77,14 +77,14 @@ function learn!( if nᵢ == 1 learner.oldMs[a] = r - learner.Ss[a] = 0.0 + learner.Ss[a] = learner.σ₀ learner.μs[a] = r else learner.newMs[a] = learner.oldMs[a] + (r - learner.oldMs[a]) / nᵢ learner.Ss[a] += (r - learner.oldMs[a]) * (r - learner.newMs[a]) learner.oldMs[a] = learner.newMs[a] learner.μs[a] = learner.newMs[a] - learner.σs[a] = sqrt(learner.Ss[a] / (nᵢ - 1)) + learner.σs[a] = sqrt(learner.Ss[a] / (nᵢ - 1) / nᵢ) end return @@ -94,12 +94,7 @@ end Draw a sample from the posterior for arm a. """ -> function Base.rand(learner::MLELearner, a::Integer) - o = try - rand(Normal(learner.μs[a], σ)) - catch - rand(Normal(learner.μs[a],learner.σ₀)) - end - return o + return rand(Normal(learner.μs[a], learner.σs[a])) end function Base.show(io::IO, learner::MLELearner) diff --git a/src/03_learners/06_james_stein_learner.jl b/src/03_learners/06_james_stein_learner.jl index 4cedc39..a12705f 100644 --- a/src/03_learners/06_james_stein_learner.jl +++ b/src/03_learners/06_james_stein_learner.jl @@ -87,7 +87,7 @@ function learn!( if nᵢ == 1 learner.oldMs[a] = r - learner.Ss[a] = 0.0 + learner.Ss[a] = learner.σ₀ learner.ys[a] = r learner.μs[a] = r else @@ -95,14 +95,14 @@ function learn!( learner.Ss[a] += (r - learner.oldMs[a]) * (r - learner.newMs[a]) learner.oldMs[a] = learner.newMs[a] learner.ys[a] = learner.newMs[a] - learner.ss[a] = sqrt(learner.Ss[a] / (nᵢ - 1)) + learner.ss[a] = learner.Ss[a] / (nᵢ - 1) / nᵢ y̅ = mean(learner.ys) - φs = max(0.0, min(1.0, learner.ss ./ (sumabs2(learner.ys - y̅) ./ (learner.K - 3)))) - learner.μs[:] = learner.ys + φs .* (y̅ - learner.ys) + φs = min(1.0, learner.ss ./ (sumabs2(learner.ys - y̅) ./ (learner.K - 3))) + learner.μs[:] = y̅ + (1 - φs) .* (learner.ys - y̅) learner.σs[:] = sqrt( - (1 - φs) .* learner.ss + - learner.ss ./ learner.K + - 2 .* φs .* (learner.ys - y̅).^2 ./ (learner.K - 3) + (1 - φs) .* learner.ss + + φs .* learner.ss ./ learner.K + + 2 .* φs.^2 .* (learner.ys - y̅).^2 ./ (learner.K - 3) ) end @@ -113,12 +113,7 @@ end Draw a sample from the posterior for arm a. """ -> function Base.rand(learner::JamesSteinLearner, a::Integer) - o = try - rand(Normal(learner.μs[a], learner.σs[a])) - catch - rand(Normal(learner.μs[a],learner.σ₀)) - end - return o + return rand(Normal(learner.μs[a], learner.σs[a])) end function Base.show(io::IO, learner::JamesSteinLearner) From 6b856f240cb5b717ffd26bdf1639535cab71d989 Mon Sep 17 00:00:00 2001 From: Drew Dimmery Date: Sun, 2 Jul 2017 11:30:39 -0700 Subject: [PATCH 5/5] Fixing variance calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This doesn't address the concern over learner.σs containing the std dev rather than the std error, but it fixes a problem with the underlying calculation --- src/03_learners/06_james_stein_learner.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/03_learners/06_james_stein_learner.jl b/src/03_learners/06_james_stein_learner.jl index a12705f..86a4b9e 100644 --- a/src/03_learners/06_james_stein_learner.jl +++ b/src/03_learners/06_james_stein_learner.jl @@ -98,11 +98,11 @@ function learn!( learner.ss[a] = learner.Ss[a] / (nᵢ - 1) / nᵢ y̅ = mean(learner.ys) φs = min(1.0, learner.ss ./ (sumabs2(learner.ys - y̅) ./ (learner.K - 3))) - learner.μs[:] = y̅ + (1 - φs) .* (learner.ys - y̅) - learner.σs[:] = sqrt( - (1 - φs) .* learner.ss + - φs .* learner.ss ./ learner.K + - 2 .* φs.^2 .* (learner.ys - y̅).^2 ./ (learner.K - 3) + learner.μs .= y̅ .+ (1 .- φs) .* (learner.ys .- y̅) + learner.σs .= sqrt( + (1 .- φs) .* learner.ss .+ + φs .* learner.ss ./ learner.K .+ + 2 .* φs.^2 .* (learner.ys .- y̅).^2 ./ (learner.K .- 3) ) end