Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/04_algorithms/01_algorithm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ function learn!(
return
end

@doc """
Update the probability of choosing each arm.
""" ->
function update_policy!(algorithm::Algorithm, context::Context)
error("update_policy!(algorithm, context) is not implemented abstractly")
end

@doc """
Choose one of K arms given the current context.
""" ->
Expand Down
12 changes: 12 additions & 0 deletions src/04_algorithms/02_baseline/01_random_choice.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ uniformly at random.
""" ->
immutable RandomChoice{L <: Learner} <: Algorithm
learner::L
policy::Vector{Float64}
end

@doc """
Prepare to choose an arm uniformly at random.
""" ->
function update_policy!(algorithm::RandomChoice, context::Context)
K = context.K
for i in 1:K
algorithm.policy[i] = 1 / K
end
return
end

@doc """
Expand Down
17 changes: 17 additions & 0 deletions src/04_algorithms/03_e-greedy/01_standard.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ An EpsilonGreedy object represents the standard, constant-ε bandit algorithm.
immutable EpsilonGreedy{L <: Learner} <: Algorithm
learner::L
ε::Float64
policy::Vector{Float64}
end

@doc """
Update policy based on belief about best arm.
""" ->
function update_policy!(algorithm::EpsilonGreedy, context::Context)
ε, K = algorithm.ε, length(algorithm.policy)
a_star = preferred_arm(algorithm, context)
for i in 1:K
if i != a_star
algorithm.policy[i] = ε / K
else
algorithm.policy[i] = (1 - ε) + ε / K
end
end
return
end

@doc """
Expand Down
17 changes: 17 additions & 0 deletions src/04_algorithms/03_e-greedy/02_annealing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ algorithm in which ε decreases with a logarithmic annealing schedule.
""" ->
immutable AnnealingEpsilonGreedy{L <: Learner} <: Algorithm
learner::L
policy::Vector{Float64}
end

@doc """
Update policy based on belief about best arm.
""" ->
function update_policy!(algorithm::AnnealingEpsilonGreedy, context::Context)
ε, K = 1 / log(e + context.t - 1), length(algorithm.policy)
a_star = preferred_arm(algorithm, context)
for i in 1:K
if i != a_star
algorithm.policy[i] = ε / K
else
algorithm.policy[i] = (1 - ε) + ε / K
end
end
return
end

@doc """
Expand Down
18 changes: 18 additions & 0 deletions src/04_algorithms/03_e-greedy/03_decreasing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@ immutable DecreasingEpsilonGreedy{L <: Learner} <: Algorithm
learner::L
c::Float64
d::Float64
policy::Vector{Float64}
end

@doc """
Update policy based on belief about best arm.
""" ->
function update_policy!(algorithm::DecreasingEpsilonGreedy, context::Context)
ε = min(1.0, (algorithm.c * context.K) / (algorithm.d^2 * context.t))
K = length(algorithm.policy)
a_star = preferred_arm(algorithm, context)
for i in 1:K
if i != a_star
algorithm.policy[i] = ε / K
else
algorithm.policy[i] = (1 - ε) + ε / K
end
end
return
end

@doc """
Expand Down
13 changes: 13 additions & 0 deletions src/04_algorithms/04_softmax/01_standard.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,19 @@ function initialize!(algorithm::Softmax, K::Integer)
return
end

@doc """
Update policy based on empirical means and temperature.
""" ->
function update_policy!(algorithm::Softmax, context::Context)
μs = means(algorithm.learner)
τ = algorithm.τ
for i in 1:context.K
algorithm.tmeans[i] = μs[i] / τ
end
softmax!(algorithm.probs, algorithm.tmeans)
return
end

@doc """
Select an arm according to the softmax rule. Recompute temperature adjusted
means to make sure that the softmax selection probabilities are correct.
Expand Down
13 changes: 13 additions & 0 deletions src/04_algorithms/04_softmax/02_annealing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,19 @@ function initialize!(algorithm::AnnealingSoftmax, K::Integer)
return
end

@doc """
Update policy based on empirical means and temperature.
""" ->
function update_policy!(algorithm::AnnealingSoftmax, context::Context)
μs = means(algorithm.learner)
τ = algorithm.τ₀ / log(e + context.t - 1)
for i in 1:context.K
algorithm.tmeans[i] = μs[i] / τ
end
softmax!(algorithm.probs, algorithm.tmeans)
return
end

@doc """
Select an arm according to the softmax rule. First, the current temperature
is computed. Then we recompute temperature adjusted means to make sure that the
Expand Down
42 changes: 42 additions & 0 deletions src/04_algorithms/05_ucb/01_ucb1.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,54 @@
immutable UCB1{L <: Learner} <: Algorithm
learner::L
policy::Vector{Float64}
end

function initialize!(algorithm::UCB1, K::Integer)
initialize!(algorithm.learner, K)
return
end

@doc """
Update policy based on empirical means and temperature.
""" ->
function update_policy!(algorithm::UCB1, context::Context)
μs = means(algorithm.learner)
ns = counts(algorithm.learner)

for a in 1:context.K
if ns[a] == 0
for i in 1:K
if a == i
algorithm.policy[i] = 1.0
else
algorithm.policy[i] = 0.0
end
end
return
end
end

max_score, chosen_a = -Inf, 0
for a in 1:context.K
bonus = sqrt(2 * log(context.t) / ns[a])
score = μs[a] + bonus
if score > max_score
max_score = score
chosen_a = a
end
end

for a in 1:context.K
if chosen_a == a
algorithm.policy[a] = 1.0
else
algorithm.policy[a] = 0.0
end
end

return
end

function choose_arm(algorithm::UCB1, context::Context)
μs = means(algorithm.learner)
ns = counts(algorithm.learner)
Expand Down