diff --git a/src/02_bandits/02_stochastic_bandit.jl b/src/02_bandits/02_stochastic_bandit.jl index df940f3..4d5c27f 100644 --- a/src/02_bandits/02_stochastic_bandit.jl +++ b/src/02_bandits/02_stochastic_bandit.jl @@ -38,6 +38,14 @@ end Construct a new StochasticBandit object from a vector of probability distribution objects. """ -> +function StochasticBandit{D <: UnivariateDistribution}(arms::Vector{D}) + return StochasticBandit{D}(arms) +end + +@doc """ +Construct a new StochasticBandit object from a vector of probability +distribution objects and a time period integer. +""" -> function StochasticBandit{D <: UnivariateDistribution}(arms::Vector{D}, t::Integer) return StochasticBandit{D}(arms) end diff --git a/src/03_learners/02_mle_learner.jl b/src/03_learners/02_mle_learner.jl index 2d87e4c..00e10ce 100644 --- a/src/03_learners/02_mle_learner.jl +++ b/src/03_learners/02_mle_learner.jl @@ -77,19 +77,26 @@ function learn!( if nᵢ == 1 learner.oldMs[a] = r - learner.Ss[a] = 0.0 + learner.Ss[a] = learner.σ₀ learner.μs[a] = r else learner.newMs[a] = learner.oldMs[a] + (r - learner.oldMs[a]) / nᵢ learner.Ss[a] += (r - learner.oldMs[a]) * (r - learner.newMs[a]) learner.oldMs[a] = learner.newMs[a] learner.μs[a] = learner.newMs[a] - learner.σs[a] = sqrt(learner.Ss[a] / (nᵢ - 1)) + learner.σs[a] = sqrt(learner.Ss[a] / (nᵢ - 1) / nᵢ) end return end +@doc """ +Draw a sample from the posterior for arm a. +""" -> +function Base.rand(learner::MLELearner, a::Integer) + return rand(Normal(learner.μs[a], learner.σs[a])) +end + function Base.show(io::IO, learner::MLELearner) @printf(io, "MLELearner(%f, %f)", learner.μ₀, learner.σ₀) end diff --git a/src/03_learners/06_james_stein_learner.jl b/src/03_learners/06_james_stein_learner.jl new file mode 100644 index 0000000..86a4b9e --- /dev/null +++ b/src/03_learners/06_james_stein_learner.jl @@ -0,0 +1,121 @@ +@doc """ +A JamesSteinLearner object stores the online estimated mean and variance of all +arms. Arms with zero counts use a default mean and standard deviation. +""" -> +immutable JamesSteinLearner <: Learner + ns::Vector{Int64} + oldMs::Vector{Float64} + newMs::Vector{Float64} + Ss::Vector{Float64} + ys::Vector{Float64} + ss::Vector{Float64} + μs::Vector{Float64} + σs::Vector{Float64} + μ₀::Float64 + σ₀::Float64 + K::Int64 +end + +@doc """ +Create an JamesSteinLearner object specifying only a default mean and standard +deviation. +""" -> +function JamesSteinLearner(μ₀::Real, σ₀::Real) + return JamesSteinLearner( + Array(Int64, 0), + Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), + Array(Float64, 0), + Float64(μ₀), + Float64(σ₀), + Int64(1) + ) +end + +@doc """ +Return the counts for each arm. +""" -> +counts(learner::JamesSteinLearner) = learner.ns + +@doc """ +Return the means for each arm. +""" -> +means(learner::JamesSteinLearner) = learner.μs + +@doc """ +Return the standard deviations for each arm. +""" -> +stds(learner::JamesSteinLearner) = learner.σs + +@doc """ +Reset the JamesSteinLearner object for K arms. +""" -> +function initialize!(learner::JamesSteinLearner, K::Integer) + resize!(learner.ns, K) + resize!(learner.oldMs, K) + resize!(learner.newMs, K) + resize!(learner.Ss, K) + resize!(learner.ys, K) + resize!(learner.ss, K) + resize!(learner.μs, K) + resize!(learner.σs, K) + + fill!(learner.ns, 0) + fill!(learner.ys, learner.μ₀) + fill!(learner.μs, learner.μ₀) + fill!(learner.ss, learner.σ₀) + fill!(learner.σs, learner.σ₀) + + return +end + +@doc """ +Learn about arm a on trial t from reward r. +""" -> +function learn!( + learner::JamesSteinLearner, + context::Context, + a::Integer, + r::Real, +) + learner.ns[a] += 1 + nᵢ = learner.ns[a] + + if nᵢ == 1 + learner.oldMs[a] = r + learner.Ss[a] = learner.σ₀ + learner.ys[a] = r + learner.μs[a] = r + else + learner.newMs[a] = learner.oldMs[a] + (r - learner.oldMs[a]) / nᵢ + learner.Ss[a] += (r - learner.oldMs[a]) * (r - learner.newMs[a]) + learner.oldMs[a] = learner.newMs[a] + learner.ys[a] = learner.newMs[a] + learner.ss[a] = learner.Ss[a] / (nᵢ - 1) / nᵢ + y̅ = mean(learner.ys) + φs = min(1.0, learner.ss ./ (sumabs2(learner.ys - y̅) ./ (learner.K - 3))) + learner.μs .= y̅ .+ (1 .- φs) .* (learner.ys .- y̅) + learner.σs .= sqrt( + (1 .- φs) .* learner.ss .+ + φs .* learner.ss ./ learner.K .+ + 2 .* φs.^2 .* (learner.ys .- y̅).^2 ./ (learner.K .- 3) + ) + end + + return +end + +@doc """ +Draw a sample from the posterior for arm a. +""" -> +function Base.rand(learner::JamesSteinLearner, a::Integer) + return rand(Normal(learner.μs[a], learner.σs[a])) +end + +function Base.show(io::IO, learner::JamesSteinLearner) + @printf(io, "JamesSteinLearner(%f, %f)", learner.μ₀, learner.σ₀) +end diff --git a/src/Bandits.jl b/src/Bandits.jl index d356026..3d044e9 100644 --- a/src/Bandits.jl +++ b/src/Bandits.jl @@ -22,7 +22,7 @@ module Bandits include(joinpath("07_distributions", "03_nonstationary_multivariate_distribution.jl")) include(joinpath("07_distributions", "04_nonstationary_contextual_distribution.jl")) include(joinpath("07_distributions", "05_nonstationary_logistic_contextual_distribution.jl")) - include(joinpath("07_distributions", "06_nonstationary_gaussian_distribution.jl")) + include(joinpath("07_distributions", "06_nonstationary_gaussian_distribution.jl")) include(joinpath("07_distributions", "07_nonstationary_1dgaussianprocess_distribution.jl")) # Bandit @@ -35,7 +35,7 @@ module Bandits # Learners export Learner, MLELearner, BetaLearner, BootstrapLearner, - BootstrapMLELearner, EBBetaLearner, DiscLearner + BootstrapMLELearner, EBBetaLearner, DiscLearner, JamesSteinLearner export initialize!, counts, means, stds, learn!, preferred_arm include(joinpath("03_learners", "01_learner.jl")) include(joinpath("03_learners", "02_mle_learner.jl")) @@ -43,6 +43,7 @@ module Bandits include(joinpath("03_learners", "04_bootstrap_learner.jl")) include(joinpath("03_learners", "05_eb_beta_learner.jl")) include(joinpath("03_learners", "05_disc_learner.jl")) + include(joinpath("03_learners", "06_james_stein_learner.jl")) # Algorithms export