diff --git a/src/04_algorithms/01_algorithm.jl b/src/04_algorithms/01_algorithm.jl
index 16ba72f..ee91c48 100644
--- a/src/04_algorithms/01_algorithm.jl
+++ b/src/04_algorithms/01_algorithm.jl
@@ -30,6 +30,13 @@ function learn!(
     return
 end
 
+@doc """
+Update the probability of choosing each arm.
+""" ->
+function update_policy!(algorithm::Algorithm, context::Context)
+    error("update_policy!(algorithm, context) is not implemented abstractly")
+end
+
 @doc """
 Choose one of K arms given the current context.
 """ ->
diff --git a/src/04_algorithms/02_baseline/01_random_choice.jl b/src/04_algorithms/02_baseline/01_random_choice.jl
index 28b6acf..051388d 100644
--- a/src/04_algorithms/02_baseline/01_random_choice.jl
+++ b/src/04_algorithms/02_baseline/01_random_choice.jl
@@ -4,6 +4,18 @@ uniformly at random.
 """ ->
 immutable RandomChoice{L <: Learner} <: Algorithm
     learner::L
+    policy::Vector{Float64}
+end
+
+@doc """
+Prepare to choose an arm uniformly at random.
+""" ->
+function update_policy!(algorithm::RandomChoice, context::Context)
+    K = context.K
+    for i in 1:K
+        algorithm.policy[i] = 1 / K
+    end
+    return
 end
 
 @doc """
diff --git a/src/04_algorithms/03_e-greedy/01_standard.jl b/src/04_algorithms/03_e-greedy/01_standard.jl
index a49af8c..a5e43d3 100644
--- a/src/04_algorithms/03_e-greedy/01_standard.jl
+++ b/src/04_algorithms/03_e-greedy/01_standard.jl
@@ -4,6 +4,23 @@ An EpsilonGreedy object represents the standard, constant-ε bandit algorithm.
 immutable EpsilonGreedy{L <: Learner} <: Algorithm
     learner::L
     ε::Float64
+    policy::Vector{Float64}
+end
+
+@doc """
+Update policy based on belief about best arm.
+""" ->
+function update_policy!(algorithm::EpsilonGreedy, context::Context)
+    ε, K = algorithm.ε, length(algorithm.policy)
+    a_star = preferred_arm(algorithm, context)
+    for i in 1:K
+        if i != a_star
+            algorithm.policy[i] = ε / K
+        else
+            algorithm.policy[i] = (1 - ε) + ε / K
+        end
+    end
+    return
 end
 
 @doc """
diff --git a/src/04_algorithms/03_e-greedy/02_annealing.jl b/src/04_algorithms/03_e-greedy/02_annealing.jl
index 4032578..717fcc7 100644
--- a/src/04_algorithms/03_e-greedy/02_annealing.jl
+++ b/src/04_algorithms/03_e-greedy/02_annealing.jl
@@ -4,6 +4,23 @@ algorithm in which ε decreases with a logarithmic annealing schedule.
 """ ->
 immutable AnnealingEpsilonGreedy{L <: Learner} <: Algorithm
     learner::L
+    policy::Vector{Float64}
+end
+
+@doc """
+Update policy based on belief about best arm.
+""" ->
+function update_policy!(algorithm::AnnealingEpsilonGreedy, context::Context)
+    ε, K = 1 / log(e + context.t - 1), length(algorithm.policy)
+    a_star = preferred_arm(algorithm, context)
+    for i in 1:K
+        if i != a_star
+            algorithm.policy[i] = ε / K
+        else
+            algorithm.policy[i] = (1 - ε) + ε / K
+        end
+    end
+    return
 end
 
 @doc """
diff --git a/src/04_algorithms/03_e-greedy/03_decreasing.jl b/src/04_algorithms/03_e-greedy/03_decreasing.jl
index c078ceb..96d46e8 100644
--- a/src/04_algorithms/03_e-greedy/03_decreasing.jl
+++ b/src/04_algorithms/03_e-greedy/03_decreasing.jl
@@ -6,6 +6,24 @@ immutable DecreasingEpsilonGreedy{L <: Learner} <: Algorithm
     learner::L
     c::Float64
     d::Float64
+    policy::Vector{Float64}
+end
+
+@doc """
+Update policy based on belief about best arm.
+""" ->
+function update_policy!(algorithm::DecreasingEpsilonGreedy, context::Context)
+    ε = min(1.0, (algorithm.c * context.K) / (algorithm.d^2 * context.t))
+    K = length(algorithm.policy)
+    a_star = preferred_arm(algorithm, context)
+    for i in 1:K
+        if i != a_star
+            algorithm.policy[i] = ε / K
+        else
+            algorithm.policy[i] = (1 - ε) + ε / K
+        end
+    end
+    return
 end
 
 @doc """
diff --git a/src/04_algorithms/04_softmax/01_standard.jl b/src/04_algorithms/04_softmax/01_standard.jl
index 9172653..6d5db59 100644
--- a/src/04_algorithms/04_softmax/01_standard.jl
+++ b/src/04_algorithms/04_softmax/01_standard.jl
@@ -31,6 +31,19 @@ function initialize!(algorithm::Softmax, K::Integer)
     return
 end
 
+@doc """
+Update policy based on empirical means and temperature.
+""" ->
+function update_policy!(algorithm::Softmax, context::Context)
+    μs = means(algorithm.learner)
+    τ = algorithm.τ
+    for i in 1:context.K
+        algorithm.tmeans[i] = μs[i] / τ
+    end
+    softmax!(algorithm.probs, algorithm.tmeans)
+    return
+end
+
 @doc """
 Select an arm according to the softmax rule. Recompute temperature adjusted
 means to make sure that the softmax selection probabilities are correct.
diff --git a/src/04_algorithms/04_softmax/02_annealing.jl b/src/04_algorithms/04_softmax/02_annealing.jl
index 3c170b9..c7cdd87 100644
--- a/src/04_algorithms/04_softmax/02_annealing.jl
+++ b/src/04_algorithms/04_softmax/02_annealing.jl
@@ -33,6 +33,19 @@ function initialize!(algorithm::AnnealingSoftmax, K::Integer)
     return
 end
 
+@doc """
+Update policy based on empirical means and temperature.
+""" ->
+function update_policy!(algorithm::AnnealingSoftmax, context::Context)
+    μs = means(algorithm.learner)
+    τ = algorithm.τ₀ / log(e + context.t - 1)
+    for i in 1:context.K
+        algorithm.tmeans[i] = μs[i] / τ
+    end
+    softmax!(algorithm.probs, algorithm.tmeans)
+    return
+end
+
 @doc """
 Select an arm according to the softmax rule. First, the current temperature
 is computed. Then we recompute temperature adjusted means to make sure that the
diff --git a/src/04_algorithms/05_ucb/01_ucb1.jl b/src/04_algorithms/05_ucb/01_ucb1.jl
index 2886954..a0a5871 100644
--- a/src/04_algorithms/05_ucb/01_ucb1.jl
+++ b/src/04_algorithms/05_ucb/01_ucb1.jl
@@ -1,5 +1,6 @@
 immutable UCB1{L <: Learner} <: Algorithm
     learner::L
+    policy::Vector{Float64}
 end
 
 function initialize!(algorithm::UCB1, K::Integer)
@@ -7,6 +8,47 @@ function initialize!(algorithm::UCB1, K::Integer)
     return
 end
 
+@doc """
+Update policy based on empirical means and temperature.
+""" ->
+function update_policy!(algorithm::UCB1, context::Context)
+    μs = means(algorithm.learner)
+    ns = counts(algorithm.learner)
+
+    for a in 1:context.K
+        if ns[a] == 0
+            for i in 1:K
+                if a == i
+                    algorithm.policy[i] = 1.0
+                else
+                    algorithm.policy[i] = 0.0
+                end
+            end
+            return
+        end
+    end
+
+    max_score, chosen_a = -Inf, 0
+    for a in 1:context.K
+        bonus = sqrt(2 * log(context.t) / ns[a])
+        score = μs[a] + bonus
+        if score > max_score
+            max_score = score
+            chosen_a = a
+        end
+    end
+
+    for a in 1:context.K
+        if chosen_a == a
+            algorithm.policy[a] = 1.0
+        else
+            algorithm.policy[a] = 0.0
+        end
+    end
+
+    return
+end
+
 function choose_arm(algorithm::UCB1, context::Context)
     μs = means(algorithm.learner)
     ns = counts(algorithm.learner)