Add backward pass sampling scheme (#224)

roporte · odow · commit 1e88935cdd1e · 2019-06-13T16:53:19.000-05:00
Conceived and implemented by @roporte. Closes #177
diff --git a/src/SDDP.jl b/src/SDDP.jl
@@ -40,6 +40,7 @@ include("plugins/risk_measures.jl")
 include("plugins/sampling_schemes.jl")
 include("plugins/bellman_functions.jl")
 include("plugins/stopping_rules.jl")
+include("plugins/backward_sampling_schemes.jl")
 
 # Visualization related code.
 include("visualization/publication_plot.jl")
diff --git a/src/algorithm.jl b/src/algorithm.jl
@@ -82,6 +82,7 @@ struct Options{T}
     initial_state::Dict{Symbol, Float64}
     # The sampling scheme to use on the forward pass.
     sampling_scheme::AbstractSamplingScheme
+    backward_sampling_scheme::AbstractBackwardSamplingScheme
     # Storage for the set of possible sampling states at each node. We only use
     # this if there is a cycle in the policy graph.
     starting_states::Dict{T, Vector{Dict{Symbol, Float64}}}
@@ -100,12 +101,14 @@ struct Options{T}
     function Options(model::PolicyGraph{T},
                      initial_state::Dict{Symbol, Float64},
                      sampling_scheme::AbstractSamplingScheme,
+                     backward_sampling_scheme::AbstractBackwardSamplingScheme,
                      risk_measures,
                      cycle_discretization_delta::Float64,
                      refine_at_similar_nodes::Bool) where {T, S}
         return new{T}(
             initial_state,
             sampling_scheme,
+            backward_sampling_scheme,
             to_nodal_form(model, x -> Dict{Symbol, Float64}[]),
             to_nodal_form(model, risk_measures),
             cycle_discretization_delta,
@@ -477,7 +480,9 @@ function backward_pass(
                 belief == 0.0 && continue
                 solve_all_children(
                     model, model[node_index], items, belief, belief_state,
-                    objective_state, outgoing_state, scenario_path[1:index])
+                    objective_state, outgoing_state,
+                    options.backward_sampling_scheme,
+                    scenario_path[1:index])
             end
             # We need to refine our estimate at all nodes in the partition.
             for node_index in model.belief_partition[partition_index]
@@ -501,7 +506,9 @@ function backward_pass(
             end
             solve_all_children(
                 model, node, items, 1.0, belief_state, objective_state,
-                outgoing_state, scenario_path[1:index])
+                outgoing_state, options.backward_sampling_scheme,
+                scenario_path[1:index]
+            )
             refine_bellman_function(
                 model, node, node.bellman_function,
                 options.risk_measures[node_index], outgoing_state,
@@ -545,13 +552,19 @@ struct BackwardPassItems{T, U}
 end
 
 function solve_all_children(
-        model::PolicyGraph{T}, node::Node{T}, items::BackwardPassItems,
-        belief::Float64, belief_state, objective_state,
-        outgoing_state::Dict{Symbol, Float64}, scenario_path) where {T}
+    model::PolicyGraph{T}, node::Node{T}, items::BackwardPassItems,
+    belief::Float64, belief_state, objective_state,
+    outgoing_state::Dict{Symbol, Float64},
+    backward_sampling_scheme::AbstractBackwardSamplingScheme,
+    scenario_path
+) where {T}
     length_scenario_path = length(scenario_path)
     for child in node.children
+        if isapprox(child.probability, 0.0, atol=1e-6)
+            continue
+        end
         child_node = model[child.term]
-        for noise in child_node.noise_terms
+        for noise in sample_backward_noise_terms(backward_sampling_scheme, child_node)
             if length(scenario_path) == length_scenario_path
                 push!(scenario_path, (child.term, noise.term))
             else
@@ -620,6 +633,9 @@ function calculate_bound(model::PolicyGraph{T},
 
     # Solve all problems that are children of the root node.
     for child in model.root_children
+        if isapprox(child.probability, 0.0, atol=1e-6)
+            continue
+        end
         node = model[child.term]
         for noise in node.noise_terms
             if node.objective_state !== nothing
@@ -745,6 +761,9 @@ Train the policy for `model`. Keyword arguments:
  - `sampling_scheme`: a sampling scheme to use on the forward pass of the
     algorithm. Defaults to [`InSampleMonteCarlo`](@ref).
 
+ - `backward_sampling_scheme`: a backward pass sampling scheme to use on the
+    backward pass of the algorithm. Defaults to `CompleteSampler`.
+
  - `cut_type`: choose between `SDDP.SINGLE_CUT` and `SDDP.MULTI_CUT` versions of SDDP.
 
  - `dashboard::Bool`: open a visualization of the training over time. Defaults
@@ -770,6 +789,7 @@ function train(
     cycle_discretization_delta::Float64 = 0.0,
     refine_at_similar_nodes::Bool = true,
     cut_deletion_minimum::Int = 1,
+    backward_sampling_scheme::AbstractBackwardSamplingScheme = SDDP.CompleteSampler(),
     dashboard::Bool = false
 )
     # Reset the TimerOutput.
@@ -812,6 +832,7 @@ function train(
         model,
         model.initial_root_state,
         sampling_scheme,
+        backward_sampling_scheme,
         risk_measure,
         cycle_discretization_delta,
         refine_at_similar_nodes
diff --git a/src/plugins/backward_sampling_schemes.jl b/src/plugins/backward_sampling_schemes.jl
@@ -0,0 +1,32 @@
+#  Copyright 2017-19, Oscar Dowson and contributors.
+#  This Source Code Form is subject to the terms of the Mozilla Public
+#  License, v. 2.0. If a copy of the MPL was not distributed with this
+#  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""
+     CompleteSampler()
+
+Backward sampler that returns all noises of the corresponding node.
+"""
+struct CompleteSampler <: AbstractBackwardSamplingScheme end
+
+sample_backward_noise_terms(::CompleteSampler, node) = node.noise_terms
+
+
+"""
+     MonteCarloSampler(number_of_samples::Int)
+
+Backward sampler that returns `number_of_samples` noises sampled with
+replacement from noises on the corresponding node.
+"""
+struct MonteCarloSampler <: AbstractBackwardSamplingScheme
+    number_of_samples::Int
+end
+
+function sample_backward_noise_terms(sampler::MonteCarloSampler, node::Node)
+    prob = 1 / sampler.number_of_samples
+    return [
+        Noise(sample_noise(InSampleMonteCarlo(), node.noise_terms), prob)
+        for _ in 1:sampler.number_of_samples
+    ]
+end
diff --git a/src/plugins/headers.jl b/src/plugins/headers.jl
@@ -133,3 +133,26 @@ function convergence_test(graph::PolicyGraph, log::Vector{Log},
     end
     return false, :not_solved
 end
+
+# ============================== backward_samplers =========================== #
+
+"""
+    AbstractBackwardSamplingScheme
+
+The abstract type for backward sampling scheme interface.
+
+You need to define the following methods:
+ - [`SDDP.sample_backward_noise_terms`](@ref)
+"""
+abstract type AbstractBackwardSamplingScheme end
+
+"""
+    sample_backward_noise_terms(
+        backward_sampling_scheme::AbstractBackwardSamplingScheme,
+        node::Node{T}
+    )::Vector{Noise}
+
+Returns a `Vector{Noise}` of noises sampled from `node.noise_terms` using
+`backward_sampling_scheme`
+"""
+function sample_backward_noise_terms end
diff --git a/test/algorithm.jl b/test/algorithm.jl
@@ -23,6 +23,7 @@ using SDDP, Test, GLPK
             model,
             Dict(:x => 1.0),
             SDDP.InSampleMonteCarlo(),
+            SDDP.CompleteSampler(),
             SDDP.Expectation(),
             0.0,
             true
diff --git a/test/plugins/backward_sampling_schemes.jl b/test/plugins/backward_sampling_schemes.jl
@@ -0,0 +1,63 @@
+#  Copyright 2017-19, Oscar Dowson.
+#  This Source Code Form is subject to the terms of the Mozilla Public
+#  License, v. 2.0. If a copy of the MPL was not distributed with this
+#  file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+using SDDP, Test
+
+@testset "CompleteSampler" begin
+    model = SDDP.LinearPolicyGraph(
+        stages = 2, lower_bound = 0.0, direct_mode = false
+    ) do node, stage
+        @variable(node, 0 <= x <= 1)
+        SDDP.parameterize(node, stage * [1, 3], [0.5, 0.5]) do ω
+            JuMP.set_upper_bound(x, ω)
+        end
+    end
+    terms = SDDP.sample_backward_noise_terms(SDDP.CompleteSampler(), model[1])
+    @test terms == model[1].noise_terms
+end
+
+@testset "MonteCarloSampler(1)" begin
+    model = SDDP.LinearPolicyGraph(
+        stages = 1, lower_bound = 0.0, direct_mode = false
+    ) do node, stage
+        @variable(node, 0 <= x <= 1)
+        SDDP.parameterize(node, [1, 3], [0.9, 0.1]) do ω
+            JuMP.set_upper_bound(x, ω)
+        end
+    end
+    term_count = 0
+    for i in 1:100
+        terms = SDDP.sample_backward_noise_terms(SDDP.MonteCarloSampler(1), model[1])
+        @test terms[1].probability == 1.0
+        if terms[1].term == model[1].noise_terms[1].term
+            term_count += 1
+        else
+            term_count -= 1
+        end
+    end
+    @test term_count > 20
+end
+
+@testset "MonteCarloSampler(100)" begin
+    model = SDDP.LinearPolicyGraph(
+        stages = 1, lower_bound = 0.0, direct_mode = false
+    ) do node, stage
+        @variable(node, 0 <= x <= 1)
+        SDDP.parameterize(node, [1, 3], [0.9, 0.1]) do ω
+            JuMP.set_upper_bound(x, ω)
+        end
+    end
+    terms = SDDP.sample_backward_noise_terms(SDDP.MonteCarloSampler(100), model[1])
+    term_count = 0
+    for term in terms
+        @test term.probability == 0.01
+        if term.term == model[1].noise_terms[1].term
+            term_count += 1
+        else
+            term_count -= 1
+        end
+    end
+    @test term_count > 20
+end