Merge pull request #23 from JuliaPOMDP/docs_update

Docs update with a small change to MCVISimulator constructor with tests added.
JuliaPOMDP · Dec 18, 2023 · 52d9f8e · 52d9f8e · dylan-asmar · Dec 18, 2023
2 parents a54cc5a + 981253e
commit 52d9f8e
Show file tree

Hide file tree

Showing 11 changed files with 155 additions and 46 deletions.
diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
@@ -0,0 +1,23 @@
+name: Documentation
+
+on:
+  push:
+    branches:
+      - master
+    tags: [v*]
+  pull_request:
+
+jobs:
+  build:
+    permissions:
+      contents: write
+      statuses: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-docdeploy@v1
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} 
diff --git a/README.md b/README.md
@@ -2,6 +2,7 @@
 
 [![CI](https://github.com/JuliaPOMDP/MCVI.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/JuliaPOMDP/MCVI.jl/actions/workflows/CI.yml)
 [![codecov.io](http://codecov.io/github/JuliaPOMDP/MCVI.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaPOMDP/MCVI.jl?branch=master)
+[![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaPOMDP.github.io/MCVI.jl/stable)
 
 The Monte Carlo Value Iteration (MCVI) offline solver for `POMDPs.jl`.
 
@@ -12,12 +13,10 @@ Bai, H., Hsu, D., & Lee, W. S. (2014). Integrated perception and planning in the
 ## Installation
 
 ```julia
-using POMDPs
-POMDPs.add_registry()
-import Pkg
+using Pkg
 Pkg.add("MCVI")
 ```
 
-## Documentation
+## Example
 
-See [here](http://juliapomdp.github.io/MCVI.jl/)
+Reference the docs for an example using `LightDark1D`.
diff --git a/deps/build.jl b/deps/build.jl
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -0,0 +1,6 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+MCVI = "30d33687-f7ed-5b0d-8b26-3dc9b7abd572"
+POMDPModels = "355abbd5-f08e-5560-ac9e-8b5f2592a0ca"
+POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,9 +1,20 @@
-using Documenter, MCVI
+using Documenter
+using MCVI
+using POMDPs
+using POMDPModels
+using Random
 
-makedocs(modules=MCVI)
+makedocs(
+    sitename = "MCVI.jl",
+    authors = "Jayesh K. Gupta",
+    modules = [MCVI],
+    format = Documenter.HTML(),
+    # doctest = false,
+    checkdocs = :none,
+
+)
 
 deploydocs(
            repo   = "github.com/JuliaPOMDP/MCVI.jl.git",
-           julia  = "0.5",
-           osname = "linux"
-           )
+           versions = ["stable" => "v^", "v#.#"]
+)
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -1,7 +1,7 @@
 # MCVI
 
-[![Build Status](https://travis-ci.org/JuliaPOMDP/MCVI.jl.svg?branch=master)](https://travis-ci.org/JuliaPOMDP/MCVI.jl)
-[![Coverage Status](https://coveralls.io/repos/github/JuliaPOMDP/MCVI.jl/badge.svg?branch=master)](https://coveralls.io/github/JuliaPOMDP/MCVI.jl?branch=master)
+[![CI](https://github.com/JuliaPOMDP/MCVI.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/JuliaPOMDP/MCVI.jl/actions/workflows/CI.yml)
+[![codecov.io](http://codecov.io/github/JuliaPOMDP/MCVI.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaPOMDP/MCVI.jl?branch=master)
 
 The Monte Carlo Value Iteration (MCVI) offline solver for `POMDPs.jl`.
 
@@ -12,9 +12,62 @@ Bai, H., Hsu, D., & Lee, W. S. (2014). Integrated perception and planning in the
 ## Installation
 
 ```julia
-Pkg.clone("https://github.com/JuliaPOMDP/MCVI.jl")
+using Pkg
+Pkg.add("MCVI")
+```
+
+## Example
+
+```jldoctest
+using POMDPs
+using POMDPModels
+using MCVI
+using Random
+
+mutable struct LightDark1DLowerBound
+    rng::AbstractRNG
+end
+
+mutable struct LightDark1DUpperBound
+    rng::AbstractRNG
+end
+
+function MCVI.init_lower_action(p::LightDark1D)
+    return 0
+end
+
+function MCVI.lower_bound(lb::LightDark1DLowerBound, p::LightDark1D, s::LightDark1DState)
+    r = @gen(:r)(p, s, MCVI.init_lower_action(p), lb.rng)
+    return r * discount(p)
+end
+
+function MCVI.upper_bound(ub::LightDark1DUpperBound, p::LightDark1D, s::LightDark1DState)
+    steps = abs(s.y)/p.step_size + 1
+    return p.correct_r*(discount(p)^steps)
+end
+
+prob = LightDark1D()
+sim = MCVISimulator(rng=MersenneTwister(1))
+
+solver = MCVISolver(sim, nothing, 1, 100, 8, 500, 1000, 5000, 50, LightDark1DLowerBound(sim.rng), LightDark1DUpperBound(sim.rng))
+
+println("Solving...")
+policy = solve(solver, prob)
+println("Solved!")
+
+up = updater(policy)
+reward = simulate(MCVISimulator(rng=MersenneTwister(1)), prob, policy, up, up.root)
+println("Reward: ", reward)
+
+# output
+Solving...
+Gap closed!
+Solved!
+Reward: 5.314410000000001
 ```
 
 ## Documentation
 
-Behavior is controlled using [solver options](@ref Solver).
+```@docs
+MCVISolver
+```
diff --git a/docs/src/solver.md b/docs/src/solver.md
diff --git a/src/simulate.jl b/src/simulate.jl
@@ -4,7 +4,14 @@ mutable struct MCVISimulator <: POMDPs.Simulator
     times::Integer
     display::Bool
 end
-MCVISimulator() = MCVISimulator(MersenneTwister(420), 1, false)
+MCVISimulator(rng::AbstractRNG) = MCVISimulator(rng, 1, false)
+function MCVISimulator(; 
+    rng::AbstractRNG=Random.GLOBAL_RNG,
+    times::Integer=1,
+    display::Bool=false
+)
+    return MCVISimulator(rng, times, display)
+end
 
 function simulate(sim::MCVISimulator, pomdp::POMDPs.POMDP, policy::MCVIPolicy, updater::MCVIUpdater, initial_node::MCVINode, init_state=nothing)
     sum_reward::Reward = 0

diff --git a/src/solver.jl b/src/solver.jl
@@ -22,23 +22,25 @@ mutable struct ActionNode{O,A} <: TreeNode
 end
 
 """
+    MCVISolver <: POMDPs.Solver
+        
+The MCVI solver is a solver for POMDPs that uses Monte Carlo Value Iteration to solve the problem. 
+    Described in Bai, H., Hsu, D., & Lee, W. S. (2014). Integrated perception and planning in the continuous space: A POMDP approach. *The International Journal of Robotics Research*, 33(9), 1288-1302.
+    
+# Fields
+- `simulater::POMDPs.Simulator`
+- `root::Union{BeliefNode, Nothing}`
+- `n_iter::Int64`: Number of iterations
+- `num_particles::Int64`: Number of belief particles to be used
+- `obs_branch::Int64`: Branching factor (previous default: 8)
+- `num_state::Int64`: Number of states to sample from belief (previous default: 500)
+- `num_prune_obs::Int64`: Number of times to sample observation while pruning alpha edges (previous default: 1000)
+- `num_eval_belief::Int64`: Number of times to simulate while evaluating belief (previous default: 5000)
+- `num_obs::Int64`: Number of observations to sample while evaluating belief (previous default: 50)
+- `lbound::Any`: An object representing the lower bound. The function `MCVI.lower_bound(lbound, problem, s)` will be called to get the lower bound for the state `s` - this function needs to be implemented for the solver to work.
+- `ubound::Any`: An object representing the upper bound. The function `MCVI.upper_bound(ubound, problem, s)` will be called to get the lower bound for the state `s` - this function needs to be implemented for the solver to work.
 
-Hyperparameters:
-
-- `n_iter`          : Number of iterations
-- `num_particles`   : Number of belief particles to be used
-- `obs_branch`      : Branching factor [default 8?]
-- `num_state`       : Number of states to sample from belief [default 500?]
-- `num_prune_obs`   : Number of times to sample observation while pruning alpha edges [default 1000?]
-- `num_eval_belief` : Number of times to simulate while evaluating belief [default 5000?]
-- `num_obs`         : [default 50?]
-
-Bounds:
-
-- `lbound`          : An object representing the lower bound. The function `MCVI.lower_bound(lbound, problem, s)` will be called to get the lower bound for the state `s` - this function needs to be implemented for the solver to work.
-- `ubound`          : An object representing the upper bound. The function `MCVI.upper_bound(ubound, problem, s)` will be called to get the lower bound for the state `s` - this function needs to be implemented for the solver to work.
-
-See `$(joinpath(dirname(pathof(MCVI)),"..", "test","runtests.jl"))` for an example of bounds implemented for the Light Dark problem.
+Reference the docs for an example of bounds implemented for the Light Dark problem.
 """
 mutable struct MCVISolver <: POMDPs.Solver
     simulator::POMDPs.Simulator

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -16,20 +16,20 @@ mutable struct LightDark1DUpperBound
     rng::AbstractRNG
 end
 
-function lower_bound(lb::LightDark1DLowerBound, p::LightDark1D, s::LightDark1DState)
-    r = @gen(:r)(p, s, init_lower_action(p), lb.rng)
+function MCVI.init_lower_action(p::LightDark1D)
+    return 0
+end
+
+function MCVI.lower_bound(lb::LightDark1DLowerBound, p::LightDark1D, s::LightDark1DState)
+    r = @gen(:r)(p, s, MCVI.init_lower_action(p), lb.rng)
     return r * discount(p)
 end
 
-function upper_bound(ub::LightDark1DUpperBound, p::LightDark1D, s::LightDark1DState)
+function MCVI.upper_bound(ub::LightDark1DUpperBound, p::LightDark1D, s::LightDark1DState)
     steps = abs(s.y)/p.step_size + 1
     return p.correct_r*(discount(p)^steps)
 end
 
-function init_lower_action(p::LightDark1D)
-    return 0 # Worst? This depends on the initial state? XXX
-end
-
 include("test_policy.jl")
 
 include("test_updater.jl")
@@ -47,6 +47,21 @@ include("test_solve.jl")
 @test test_solve()
 
 include("test_simulation.jl")
+@testset "MCVISimulator" begin
+   @test test_simulation()
+   mcvi_sim = MCVISimulator()
+   @test mcvi_sim.times == 1
+   @test mcvi_sim.display == false
+   @test mcvi_sim.rng == Random.GLOBAL_RNG
+   mcvi_sim = MCVISimulator(MersenneTwister(42))
+   @test mcvi_sim.times == 1
+   @test mcvi_sim.display == false
+   @test mcvi_sim.rng == MersenneTwister(42)
+   mcvi_sim = MCVISimulator(rng=MersenneTwister(42), times=10, display=true)
+   @test mcvi_sim.times == 10
+   @test mcvi_sim.display == true
+   @test mcvi_sim.rng == MersenneTwister(42)    
+end
 test_simulation()
 
 include("test_requirements.jl")

diff --git a/test/test_updater.jl b/test/test_updater.jl
@@ -6,7 +6,7 @@ function test_backup()
     # s0 = initialstate(p)
     # sim = MCVISimulator(MersenneTwister(420), s0, 1)
     scratch = MCVI.Scratch(Vector{Float64}(undef, 50), zeros(50), zeros(50), zeros(50, 2))
-    sim = MCVISimulator()
+    sim = MCVISimulator(rng=MersenneTwister(420))
 
     n, _ = MCVI.backup(b0, policy, sim, p, 500, 1000, 500, scratch)
     MCVI.addnode!(policy.updater, n)