From bc13f9bb4e4d4dc6e27e7f3ace41a1be1c30accf Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Mon, 18 Dec 2023 13:21:04 -0700 Subject: [PATCH 1/8] Updated readme --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d677a8e..ea928a9 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![CI](https://github.com/JuliaPOMDP/MCVI.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/JuliaPOMDP/MCVI.jl/actions/workflows/CI.yml) [![codecov.io](http://codecov.io/github/JuliaPOMDP/MCVI.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaPOMDP/MCVI.jl?branch=master) +[![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaPOMDP.github.io/MCVI.jl/stable) The Monte Carlo Value Iteration (MCVI) offline solver for `POMDPs.jl`. @@ -12,12 +13,10 @@ Bai, H., Hsu, D., & Lee, W. S. (2014). Integrated perception and planning in the ## Installation ```julia -using POMDPs -POMDPs.add_registry() -import Pkg +using Pkg Pkg.add("MCVI") ``` -## Documentation +## Example -See [here](http://juliapomdp.github.io/MCVI.jl/) +Reference the docs for an example using `LightDark1D`. From d76dea21522eeda318373d008118a86ed6afa6de Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Mon, 18 Dec 2023 13:21:54 -0700 Subject: [PATCH 2/8] added constructors for MCVISimulator --- src/simulate.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/simulate.jl b/src/simulate.jl index f722689..09ea452 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -4,7 +4,14 @@ mutable struct MCVISimulator <: POMDPs.Simulator times::Integer display::Bool end -MCVISimulator() = MCVISimulator(MersenneTwister(420), 1, false) +MCVISimulater(rng::AbstractRNG) = MCVISimulator(rng, 1, false) +function MCVISimulator(; + rng::AbstractRNG=Random.GLOBAL_RNG, + times::Integer=1, + display::Bool=false +) + return MCVISimulator(rng, times, display) +end function simulate(sim::MCVISimulator, pomdp::POMDPs.POMDP, policy::MCVIPolicy, updater::MCVIUpdater, initial_node::MCVINode, init_state=nothing) sum_reward::Reward = 0 From d0daaa2d4faa9a2dc4efdd910465270c16448cef Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Mon, 18 Dec 2023 13:22:24 -0700 Subject: [PATCH 3/8] Modified runtests example to be more explicity for use outside of testing envrionment --- test/runtests.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 2d88bac..d50adc2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,20 +16,20 @@ mutable struct LightDark1DUpperBound rng::AbstractRNG end -function lower_bound(lb::LightDark1DLowerBound, p::LightDark1D, s::LightDark1DState) - r = @gen(:r)(p, s, init_lower_action(p), lb.rng) +function MCVI.init_lower_action(p::LightDark1D) + return 0 +end + +function MCVI.lower_bound(lb::LightDark1DLowerBound, p::LightDark1D, s::LightDark1DState) + r = @gen(:r)(p, s, MCVI.init_lower_action(p), lb.rng) return r * discount(p) end -function upper_bound(ub::LightDark1DUpperBound, p::LightDark1D, s::LightDark1DState) +function MCVI.upper_bound(ub::LightDark1DUpperBound, p::LightDark1D, s::LightDark1DState) steps = abs(s.y)/p.step_size + 1 return p.correct_r*(discount(p)^steps) end -function init_lower_action(p::LightDark1D) - return 0 # Worst? This depends on the initial state? XXX -end - include("test_policy.jl") include("test_updater.jl") From df54ae64c5dda8fc01b1fee383f544a1f30fee8a Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Mon, 18 Dec 2023 13:22:37 -0700 Subject: [PATCH 4/8] updated MCVISolver doc string --- src/solver.jl | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/solver.jl b/src/solver.jl index ed42e82..5763fef 100644 --- a/src/solver.jl +++ b/src/solver.jl @@ -22,23 +22,25 @@ mutable struct ActionNode{O,A} <: TreeNode end """ + MCVISolver <: POMDPs.Solver + +The MCVI solver is a solver for POMDPs that uses Monte Carlo Value Iteration to solve the problem. + Described in Bai, H., Hsu, D., & Lee, W. S. (2014). Integrated perception and planning in the continuous space: A POMDP approach. *The International Journal of Robotics Research*, 33(9), 1288-1302. + +# Fields +- `simulater::POMDPs.Simulator` +- `root::Union{BeliefNode, Nothing}` +- `n_iter::Int64`: Number of iterations +- `num_particles::Int64`: Number of belief particles to be used +- `obs_branch::Int64`: Branching factor (previous default: 8) +- `num_state::Int64`: Number of states to sample from belief (previous default: 500) +- `num_prune_obs::Int64`: Number of times to sample observation while pruning alpha edges (previous default: 1000) +- `num_eval_belief::Int64`: Number of times to simulate while evaluating belief (previous default: 5000) +- `num_obs::Int64`: Number of observations to sample while evaluating belief (previous default: 50) +- `lbound::Any`: An object representing the lower bound. The function `MCVI.lower_bound(lbound, problem, s)` will be called to get the lower bound for the state `s` - this function needs to be implemented for the solver to work. +- `ubound::Any`: An object representing the upper bound. The function `MCVI.upper_bound(ubound, problem, s)` will be called to get the lower bound for the state `s` - this function needs to be implemented for the solver to work. -Hyperparameters: - -- `n_iter` : Number of iterations -- `num_particles` : Number of belief particles to be used -- `obs_branch` : Branching factor [default 8?] -- `num_state` : Number of states to sample from belief [default 500?] -- `num_prune_obs` : Number of times to sample observation while pruning alpha edges [default 1000?] -- `num_eval_belief` : Number of times to simulate while evaluating belief [default 5000?] -- `num_obs` : [default 50?] - -Bounds: - -- `lbound` : An object representing the lower bound. The function `MCVI.lower_bound(lbound, problem, s)` will be called to get the lower bound for the state `s` - this function needs to be implemented for the solver to work. -- `ubound` : An object representing the upper bound. The function `MCVI.upper_bound(ubound, problem, s)` will be called to get the lower bound for the state `s` - this function needs to be implemented for the solver to work. - -See `$(joinpath(dirname(pathof(MCVI)),"..", "test","runtests.jl"))` for an example of bounds implemented for the Light Dark problem. +Reference the docs for an example of bounds implemented for the Light Dark problem. """ mutable struct MCVISolver <: POMDPs.Solver simulator::POMDPs.Simulator From cc3e26faef045a403eb9e6d668139f7d52e83e8d Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Mon, 18 Dec 2023 13:23:27 -0700 Subject: [PATCH 5/8] Updated docs --- deps/build.jl | 0 docs/Project.toml | 6 +++++ docs/make.jl | 21 ++++++++++++---- docs/src/index.md | 61 +++++++++++++++++++++++++++++++++++++++++++--- docs/src/solver.md | 7 ------ 5 files changed, 79 insertions(+), 16 deletions(-) delete mode 100644 deps/build.jl create mode 100644 docs/Project.toml delete mode 100644 docs/src/solver.md diff --git a/deps/build.jl b/deps/build.jl deleted file mode 100644 index e69de29..0000000 diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..15cf031 --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,6 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +MCVI = "30d33687-f7ed-5b0d-8b26-3dc9b7abd572" +POMDPModels = "355abbd5-f08e-5560-ac9e-8b5f2592a0ca" +POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/docs/make.jl b/docs/make.jl index 5ec9d2f..6b6cfc0 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,9 +1,20 @@ -using Documenter, MCVI +using Documenter +using MCVI +using POMDPs +using POMDPModels +using Random -makedocs(modules=MCVI) +makedocs( + sitename = "MCVI.jl", + authors = "Jayesh K. Gupta", + modules = [MCVI], + format = Documenter.HTML(), + # doctest = false, + checkdocs = :none, + +) deploydocs( repo = "github.com/JuliaPOMDP/MCVI.jl.git", - julia = "0.5", - osname = "linux" - ) + versions = ["stable" => "v^", "v#.#"] +) diff --git a/docs/src/index.md b/docs/src/index.md index c4121b4..9aa1a12 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,7 +1,7 @@ # MCVI -[![Build Status](https://travis-ci.org/JuliaPOMDP/MCVI.jl.svg?branch=master)](https://travis-ci.org/JuliaPOMDP/MCVI.jl) -[![Coverage Status](https://coveralls.io/repos/github/JuliaPOMDP/MCVI.jl/badge.svg?branch=master)](https://coveralls.io/github/JuliaPOMDP/MCVI.jl?branch=master) +[![CI](https://github.com/JuliaPOMDP/MCVI.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/JuliaPOMDP/MCVI.jl/actions/workflows/CI.yml) +[![codecov.io](http://codecov.io/github/JuliaPOMDP/MCVI.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaPOMDP/MCVI.jl?branch=master) The Monte Carlo Value Iteration (MCVI) offline solver for `POMDPs.jl`. @@ -12,9 +12,62 @@ Bai, H., Hsu, D., & Lee, W. S. (2014). Integrated perception and planning in the ## Installation ```julia -Pkg.clone("https://github.com/JuliaPOMDP/MCVI.jl") +using Pkg +Pkg.add("MCVI") +``` + +## Example + +```jldoctest +using POMDPs +using POMDPModels +using MCVI +using Random + +mutable struct LightDark1DLowerBound + rng::AbstractRNG +end + +mutable struct LightDark1DUpperBound + rng::AbstractRNG +end + +function MCVI.init_lower_action(p::LightDark1D) + return 0 +end + +function MCVI.lower_bound(lb::LightDark1DLowerBound, p::LightDark1D, s::LightDark1DState) + r = @gen(:r)(p, s, MCVI.init_lower_action(p), lb.rng) + return r * discount(p) +end + +function MCVI.upper_bound(ub::LightDark1DUpperBound, p::LightDark1D, s::LightDark1DState) + steps = abs(s.y)/p.step_size + 1 + return p.correct_r*(discount(p)^steps) +end + +prob = LightDark1D() +sim = MCVISimulator(rng=MersenneTwister(1)) + +solver = MCVISolver(sim, nothing, 1, 100, 8, 500, 1000, 5000, 50, LightDark1DLowerBound(sim.rng), LightDark1DUpperBound(sim.rng)) + +println("Solving...") +policy = solve(solver, prob) +println("Solved!") + +up = updater(policy) +reward = simulate(MCVISimulator(rng=MersenneTwister(1)), prob, policy, up, up.root) +println("Reward: ", reward) + +# output +Solving... +Gap closed! +Solved! +Reward: 5.314410000000001 ``` ## Documentation -Behavior is controlled using [solver options](@ref Solver). +```@docs +MCVISolver +``` \ No newline at end of file diff --git a/docs/src/solver.md b/docs/src/solver.md deleted file mode 100644 index ef37573..0000000 --- a/docs/src/solver.md +++ /dev/null @@ -1,7 +0,0 @@ -# Solver - -Solver Options are controlled throught the MCVISolver keyword constructor: - -```@docs -MCVISolver() -``` From 74baef5fb6ca02deb6126ad156f7cf4f6f0d440f Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Mon, 18 Dec 2023 13:40:33 -0700 Subject: [PATCH 6/8] Added docs workflow --- .github/workflows/Documentation.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/Documentation.yml diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml new file mode 100644 index 0000000..525a0cd --- /dev/null +++ b/.github/workflows/Documentation.yml @@ -0,0 +1,23 @@ +name: Documentation + +on: + push: + branches: + - master + tags: [v*] + pull_request: + +jobs: + build: + permissions: + contents: write + statuses: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-docdeploy@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} \ No newline at end of file From f67060ea8299d7b6eb3b911236a3a3093b1c34bb Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Mon, 18 Dec 2023 14:26:06 -0700 Subject: [PATCH 7/8] Add tests for constructor and fixed typo --- src/simulate.jl | 2 +- test/runtests.jl | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/simulate.jl b/src/simulate.jl index 09ea452..8176253 100644 --- a/src/simulate.jl +++ b/src/simulate.jl @@ -4,7 +4,7 @@ mutable struct MCVISimulator <: POMDPs.Simulator times::Integer display::Bool end -MCVISimulater(rng::AbstractRNG) = MCVISimulator(rng, 1, false) +MCVISimulator(rng::AbstractRNG) = MCVISimulator(rng, 1, false) function MCVISimulator(; rng::AbstractRNG=Random.GLOBAL_RNG, times::Integer=1, diff --git a/test/runtests.jl b/test/runtests.jl index d50adc2..a9326a3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -47,6 +47,21 @@ include("test_solve.jl") @test test_solve() include("test_simulation.jl") +@testset "MCVISimulator" begin + @test test_simulation() + mcvi_sim = MCVISimulator() + @test mcvi_sim.times == 1 + @test mcvi_sim.display == false + @test mcvi_sim.rng == Random.GLOBAL_RNG + mcvi_sim = MCVISimulator(MersenneTwister(42)) + @test mcvi_sim.times == 1 + @test mcvi_sim.display == false + @test mcvi_sim.rng == MersenneTwister(42) + mcvi_sim = MCVISimulator(rng=MersenneTwister(42), times=10, display=true) + @test mcvi_sim.times == 10 + @test mcvi_sim.display == true + @test mcvi_sim.rng == MersenneTwister(42) +end test_simulation() include("test_requirements.jl") From 981253e04eaab4fd823e67c90a6934d00d510ade Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Mon, 18 Dec 2023 14:26:38 -0700 Subject: [PATCH 8/8] Added back the rng used for the test_backup function before MCVISimulator change --- test/test_updater.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_updater.jl b/test/test_updater.jl index 45c68f1..21d31d3 100644 --- a/test/test_updater.jl +++ b/test/test_updater.jl @@ -6,7 +6,7 @@ function test_backup() # s0 = initialstate(p) # sim = MCVISimulator(MersenneTwister(420), s0, 1) scratch = MCVI.Scratch(Vector{Float64}(undef, 50), zeros(50), zeros(50), zeros(50, 2)) - sim = MCVISimulator() + sim = MCVISimulator(rng=MersenneTwister(420)) n, _ = MCVI.backup(b0, policy, sim, p, 500, 1000, 500, scratch) MCVI.addnode!(policy.updater, n)