Fix RLDatasets.jl documentation (#467)

Mobius1D · web-flow · commit b29c9f01240d · 2021-08-17T12:54:13.000+08:00
* Refine documentation in RLDatasets.jl

* update docs in missed out files

* fix link provided

* Update atari_dataset.jl

* Update d4rl_dataset.jl

* Update d4rl_dataset.jl

* fix typo

* Fix type error

* fix type error

* update readme
diff --git a/src/ReinforcementLearningDatasets/README.md b/src/ReinforcementLearningDatasets/README.md
@@ -14,14 +14,14 @@ pkg> add https://github.com/JuliaReinforcementLearning/ReinforcementLearning.jl:
 ```julia
 using ReinforcementLearningDatasets
 ds = dataset("hopper-medium-replay-v0"; repo="d4rl")
-samples = Iterators.take!(ds)
+samples = Iterators.take(ds)
 ```
 `ds` is of the type `D4RLDataset` which consists of the entire dataset along with some other information about the dataset. `samples` are in the form of `SARTS` with batch_size 256.
 #### RL Unplugged
 ```julia
 using ReinforcementLearningDatasets
 ds = rl_unplugged_atari_dataset("pong", 1, [1, 2])
-samples = Iterators.take!(ds, 2)
+samples = take!(ds, 2)
 ```
 `ds` is a `Channel{RLTransition}` that returns batches of type `RLTransition` when `take!` is used.
 
diff --git a/src/ReinforcementLearningDatasets/src/atari/atari_dataset.jl b/src/ReinforcementLearningDatasets/src/atari/atari_dataset.jl
@@ -2,24 +2,26 @@ using NPZ
 using CodecZlib
 
 """
-Represents an iterable dataset of type AtariDataSet with the following fields:
-
-`dataset`: Dict{Symbol, Any}, representation of the dataset as a Dictionary with style as `style`
-`epochs`: Vector{Int}, list of epochs to load
-`repo`: String, the repository from which the dataset is taken
-`length`: Integer, the length of the dataset
-`batch_size`: Integer, the size of the batches returned by `iterate`.
-`style`: Tuple, the type of the NamedTuple, for now SARTS and SART is supported.
-`rng`<: AbstractRNG.
-`meta`: Dict, the metadata provided along with the dataset
-`is_shuffle`: Bool, determines if the batches returned by `iterate` are shuffled.
+Represents an `Iterable` dataset with the following fields:
+
+# Fields
+- `dataset::Dict{Symbol, Any}`: representation of the dataset as a Dictionary with style as `style`.
+- `epochs::Vector{Int}`: list of epochs to load.
+- `repo::String`: the repository from which the dataset is taken.
+- `length::Int`: the length of the dataset.
+- `batch_size::Int`: the size of the batches returned by `iterate`.
+- `style::Tuple{Symbol}`: the style of the `Iterator` that is returned, check out: [`SARTS`](@ref), [`SART`](@ref) and [`SA`](@ref)
+for types supported out of the box.
+- `rng<:AbstractRNG`.
+- `meta::Dict`: the metadata provided along with the dataset.
+- `is_shuffle::Bool`: determines if the batches returned by `iterate` are shuffled.
 """
 struct AtariDataSet{T<:AbstractRNG} <:RLDataSet
     dataset::Dict{Symbol, Any}
     epochs::Vector{Int}
     repo::String
-    length::Integer
-    batch_size::Integer
+    length::Int
+    batch_size::Int
     style::Tuple
     rng::T
     meta::Dict
@@ -31,22 +33,30 @@ const atari_frame_size = 84
 const epochs_per_game = 50
 
 """
-    dataset(dataset::String, epochs::Vector{Int}; repo::String, style::Tuple, rng<:AbstractRNG, is_shuffle::Bool, max_iters::Int64, batch_size::Int64)
-
-Creates a dataset of enclosed in a AtariDataSet type and other related metadata for the `dataset` that is passed.
-The `AtariDataSet` type is an iterable that fetches batches when used in a for loop for convenience during offline training.
-
-`dataset`: String, name of the datset.
-`index`: Int, analogous to v
-`epochs`: Vector{Int}, list of epochs to load
-`repo`: Name of the repository of the dataset
-`style`: the style of the iterator and the Dict inside AtariDataSet that is returned.
-`rng`: StableRNG
-`max_iters`: maximum number of iterations for the iterator.
-`is_shuffle`: whether the dataset is shuffled or not. `true` by default.
-`batch_size`: batch_size that is yielded by the iterator. Defaults to 256.
-
-The returned type is an infinite iterator which can be called using `iterate` and will return batches as specified in the dataset.
+    dataset(dataset, index, epochs; <keyword arguments>)
+
+Create a dataset enclosed in a [`AtariDataSet`](@ref) [`Iterable`](@ref) type. Contain other related metadata
+for the `dataset` that is passed. The returned type is an infinite or a finite `Iterator` 
+respectively depending upon whether is_shuffle is `true` or `false`. For more information regarding
+the dataset, refer to [google-research/batch_rl](https://github.com/google-research/batch_rl).
+
+# Arguments
+- `dataset::String`: name of the datset.
+- `index::Int`: analogous to `v` and different values correspond to different `seed`s that 
+are used for data collection. can be between `[1:5]`.
+- `epochs::Vector{Int}`: list of epochs to load. included epochs should be between `[0:50]`.
+- `style::Tuple{Symbol}=SARTS`: the style of the `Iterator` that is returned. can be [`SARTS`](@ref), 
+[`SART`](@ref) or [`SA`](@ref).
+- `repo::String="atari-replay-datasets"`: name of the repository of the dataset.
+- `rng<:AbstractRNG=StableRNG(123)`.
+- `is_shuffle::Bool=true`: determines if the dataset is shuffled or not.
+- `batch_size::Int=256` batch_size that is yielded by the iterator.
+
+!!! note
+
+The dataset takes up significant amount of space in RAM. Therefore it is advised to
+load even one epoch with 20GB of RAM. We are looking for ways to use lazy data loading here
+and any contributions are welcome.
 """
 function dataset(
     game::String,
@@ -172,4 +182,4 @@ function atari_verify(dataset::Dict, num_epochs::Int)
     @assert size(dataset["action"]) == (num_epochs * samples_per_epoch,)
     @assert size(dataset["reward"]) == (num_epochs * samples_per_epoch,)
     @assert size(dataset["terminal"]) == (num_epochs * samples_per_epoch,)
-end
+end
diff --git a/src/ReinforcementLearningDatasets/src/common.jl b/src/ReinforcementLearningDatasets/src/common.jl
@@ -1,8 +1,24 @@
 export SARTS
 export SART
+export SA
 export RLDataSet
 
 abstract type RLDataSet end
 
+"""
+(:state, :action, :reward, :terminal, :next_state)
+type of the returned batches.
+"""
 const SARTS = (:state, :action, :reward, :terminal, :next_state)
-const SART = (:state, :action, :reward, :terminal)
+
+"""
+(:state, :action, :reward, :terminal)
+type of the returned batches.
+"""
+const SART = (:state, :action, :reward, :terminal)
+
+"""
+(:state, :action)
+type of the returned batches.
+"""
+const SA = (:state, :action)
diff --git a/src/ReinforcementLearningDatasets/src/d4rl/d4rl_dataset.jl b/src/ReinforcementLearningDatasets/src/d4rl/d4rl_dataset.jl
@@ -9,16 +9,18 @@ export dataset
 export D4RLDataSet
 
 """
-Represents an iterable dataset of type `D4RLDataSet` with the following fields:
-
-`dataset`: Dict{Symbol, Any}, representation of the dataset as a Dictionary with style as `style`
-`repo`: String, the repository from which the dataset is taken
-`dataset_size`: Integer, the size of the dataset
-`batch_size`: Integer, the size of the batches returned by `iterate`.
-`style`: Tuple, the type of the NamedTuple, for now SARTS and SART is supported.
-`rng`<: AbstractRNG.
-`meta`: Dict, the metadata provided along with the dataset
-`is_shuffle`: Bool, determines if the batches returned by `iterate` are shuffled.
+Represents an `Iterable` dataset with the following fields:
+
+# Fields
+- `dataset::Dict{Symbol, Any}`: representation of the dataset as a Dictionary with style as `style`.
+- `repo::String`: the repository from which the dataset is taken.
+- `dataset_size::Int`, the number of samples in the dataset.
+- `batch_size::Int`: the size of the batches returned by `iterate`.
+- `style::Tuple{Symbol}`: the style of the `Iterator` that is returned, check out: [`SARTS`](@ref), [`SART`](@ref) and [`SA`](@ref)
+for types supported out of the box.
+- `rng<:AbstractRNG`.
+- `meta::Dict`: the metadata provided along with the dataset.
+- `is_shuffle::Bool`: determines if the batches returned by `iterate` are shuffled.
 """
 struct D4RLDataSet{T<:AbstractRNG} <: RLDataSet
     dataset::Dict{Symbol, Any}
@@ -35,24 +37,31 @@ end
 # TO-DO: enable the users providing their own paths to datasets if they already have it
 # TO-DO: add additional env arg to do complete verify function
 """
-    dataset(dataset::String; style::Tuple, rng<:AbstractRNG, is_shuffle::Bool, max_iters::Int64, batch_size::Int64)
-
-Creates a dataset of enclosed in a D4RLDataSet type and other related metadata for the `dataset` that is passed.
-The `D4RLDataSet` type is an iterable that fetches batches when used in a for loop for convenience during offline training.
-
-`dataset`: Dict{Symbol, Any}, Name of the datset.
-`repo`: Name of the repository of the dataset.
-`style`: the style of the iterator and the Dict inside D4RLDataSet that is returned.
-`rng`: StableRNG
-`max_iters`: maximum number of iterations for the iterator.
-`is_shuffle`: whether the dataset is shuffled or not. `true` by default.
-`batch_size`: batch_size that is yielded by the iterator. Defaults to 256.
-
-The returned type is an infinite iterator which can be called using `iterate` and will return batches as specified in the dataset.
+    dataset(dataset; <keyword arguments>)
+
+Create a dataset enclosed in a [`D4RLDataSet`](@ref) `Iterable` type. Contain other related metadata
+for the `dataset` that is passed. The returned type is an infinite or a finite `Iterator` 
+respectively depending upon whether `is_shuffle` is `true` or `false`. For more information regarding
+the dataset, refer to [D4RL](https://github.com/rail-berkeley/d4rl).
+
+# Arguments
+- `dataset::String`: name of the datset.
+- `repo::String="d4rl"`: name of the repository of the dataset.
+- `style::Tuple{Symbol}=SARTS`: the style of the `Iterator` that is returned. can be [`SARTS`](@ref),
+[`SART`](@ref) or [`SA`](@ref).
+- `rng<:AbstractRNG=StableRNG(123)`.
+- `is_shuffle::Bool=true`: determines if the dataset is shuffled or not.
+- `batch_size::Int=256` batch_size that is yielded by the iterator.
+
+!!! note
+
+[`FLOW`](https://flow-project.github.io/) and [`CARLA`](https://github.com/rail-berkeley/d4rl/wiki/CARLA-Setup) supported by [D4RL](https://github.com/rail-berkeley/d4rl) have not 
+been tested in this package yet.
 """
-function dataset(dataset::String;
-    style=SARTS,
+function dataset(
+    dataset::String;
     repo = "d4rl",
+    style=SARTS,
     rng = StableRNG(123), 
     is_shuffle = true, 
     batch_size=256
@@ -139,4 +148,4 @@ function d4rl_verify(data::Dict{String, Any})
     N_samples = size(data["observations"])[2]
     @assert size(data["rewards"]) == (N_samples,) || size(data["rewards"]) == (1, N_samples)
     @assert size(data["terminals"]) == (N_samples,) || size(data["terminals"]) == (1, N_samples)
-end
+end
diff --git a/src/ReinforcementLearningDatasets/src/rl_unplugged/atari/rl_unplugged_atari.jl b/src/ReinforcementLearningDatasets/src/rl_unplugged/atari/rl_unplugged_atari.jl
@@ -11,19 +11,7 @@ using PNGFiles
 """
     RLTransition
 
-Represents an RLTransition. It can also be used to represent a batch by adding another dimension.
-
-The constructor decodes the incoming `TFRecord.Example` to be ready to use.
-
-Fields:
-- `state`
-- `action`
-- `reward`
-- `terminal`
-- `next_state`
-- `next_action`
-- `episode_id`
-- `episode_return`
+Represent an RLTransition and can also represent a batch.
 """
 struct RLTransition
     state
@@ -66,21 +54,23 @@ function RLTransition(example::TFRecord.Example)
     RLTransition(s, a, r, t, s′, a′, episode_id, episode_return)
 end
 """
-    rl_unplugged_atari_dataset(game::String, run::Int, shards::Vector{Int}; (optional_args))
+    rl_unplugged_atari_dataset(game, run, shards; <keyword arguments>)
 
-Returns a buffered `Channel` of `RLTransition` batches which supports multi threading.
+Returns a buffered `Channel` of [`RLTransition`](@ref) batches which supports 
+multi threaded loading.
 
-### Arguments and optional_args:
+# Arguments
 
-- `game::String`, The name of the env.
-- `run::Int`, The run number. Can be in the range 1:5.
-- `shards::Vector{Int}` The shards that are to be loaded.
-optional_args:
-- `shuffle_buffer_size=10_000`, This is the size of the shuffle_buffer used in loading RLTransitions.
-- `tf_reader_bufsize=1*1024*1024`, The size of the buffer `bufsize` that is used internally in `TFRecord.read`.
-- `tf_reader_sz=10_000`, The size of the `Channel`, `channel_size` that is returned by `TFRecord.read`.
-- `batch_size=256`, The size of the batches that are returned by the Channel that is finally returned.
-- `n_preallocations`, The size of the buffer in the `Channel` that is returned.
+- `game::String`: name of the dataset.
+- `run::Int`: run number. can be in the range `1:5`.
+- `shards::Vector{Int}`: the shards that are to be loaded.
+- `shuffle_buffer_size::Int=10_000`: size of the shuffle_buffer used in loading RLTransitions.
+- `tf_reader_bufsize::Int=1*1024*1024`: the size of the buffer `bufsize` that is used internally 
+in `TFRecord.read`.
+- `tf_reader_sz::Int=10_000`: the size of the `Channel`, `channel_size` that is returned by 
+`TFRecord.read`.
+- `batch_size::Int=256`: The number of samples within the batches that are returned by the `Channel`.
+- `n_preallocations::Int=nthreads()*12`: the size of the buffer in the `Channel` that is returned.
 
 !!! note
 
@@ -90,11 +80,11 @@ function rl_unplugged_atari_dataset(
     game::String,
     run::Int,
     shards::Vector{Int};
-    shuffle_buffer_size = 10_000,
-    tf_reader_bufsize = 1*1024*1024,
-    tf_reader_sz = 10_000,
-    batch_size = 256,
-    n_preallocations = nthreads() * 12
+    shuffle_buffer_size=10_000,
+    tf_reader_bufsize=1*1024*1024,
+    tf_reader_sz=10_000,
+    batch_size=256,
+    n_preallocations=nthreads()*12
 )
     n = nthreads()