From f1541caf94a303c7a57ce36ada890293ee4df1c9 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 31 May 2019 10:03:47 -0700 Subject: [PATCH 1/9] implemented discrete explicit models --- Project.toml | 18 +++++ REQUIRE | 1 - src/QuickPOMDPs.jl | 11 ++- src/discrete_explicit.jl | 137 ++++++++++++++++++++++++++++++++++++++ test/discrete_explicit.jl | 86 ++++++++++++++++++++++++ test/runtests.jl | 7 +- 6 files changed, 255 insertions(+), 5 deletions(-) create mode 100644 Project.toml delete mode 100644 REQUIRE create mode 100644 src/discrete_explicit.jl create mode 100644 test/discrete_explicit.jl diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..417ad95 --- /dev/null +++ b/Project.toml @@ -0,0 +1,18 @@ +name = "QuickPOMDPs" +uuid = "8af83fb2-a731-493c-9049-9e19dbce6165" +authors = ["Zachary Sunberg "] +version = "0.1.0" + +[deps] +BeliefUpdaters = "8bb6e9a1-7d73-552c-a44a-e5dc5634aac4" +POMDPModelTools = "08074719-1b2a-587c-a292-00f91cc44415" +POMDPTesting = "92e6a534-49c2-5324-9027-86e3c861ab81" +POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d" + +[extras] +POMDPPolicies = "182e52fb-cfd0-5e46-8c26-fd0667c990f4" +POMDPSimulators = "e0d0a172-29c6-5d4e-96d0-f262df5d01fd" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test", "POMDPPolicies", "POMDPSimulators"] diff --git a/REQUIRE b/REQUIRE deleted file mode 100644 index 137767a..0000000 --- a/REQUIRE +++ /dev/null @@ -1 +0,0 @@ -julia 0.6 diff --git a/src/QuickPOMDPs.jl b/src/QuickPOMDPs.jl index a14ecf7..ed17fb7 100644 --- a/src/QuickPOMDPs.jl +++ b/src/QuickPOMDPs.jl @@ -1,5 +1,14 @@ module QuickPOMDPs -# package code goes here +using POMDPs +using POMDPModelTools +using BeliefUpdaters +using POMDPTesting + +export + DiscreteExplicitPOMDP, + DiscreteExplicitMDP + +include("discrete_explicit.jl") end # module diff --git a/src/discrete_explicit.jl b/src/discrete_explicit.jl new file mode 100644 index 0000000..af670c1 --- /dev/null +++ b/src/discrete_explicit.jl @@ -0,0 +1,137 @@ +struct DiscreteExplicitPOMDP{S,A,O,OF,RF} <: POMDP{S,A,O} + s::Vector{S} + a::Vector{A} + o::Vector{O} + tds::Dict{Tuple{S,A}, SparseCat{Vector{S}, Vector{Float64}}} + ods::Dict{Tuple{A,S}, SparseCat{Vector{O}, Vector{Float64}}} + ofun::OF + r::RF + smap::Dict{S,Int} + amap::Dict{A,Int} + omap::Dict{O,Int} + discount::Float64 +end + +struct DiscreteExplicitMDP{S,A,RF} <: MDP{S,A} + s::Vector{S} + a::Vector{A} + tds::Dict{Tuple{S,A}, SparseCat{Vector{S}, Vector{Float64}}} + r::RF + smap::Dict{S,Int} + amap::Dict{A,Int} + discount::Float64 +end + +const DEP = DiscreteExplicitPOMDP +const DE = Union{DiscreteExplicitPOMDP,DiscreteExplicitMDP} + +POMDPs.discount(m::DE) = m.discount +POMDPs.states(m::DE) = m.s +POMDPs.actions(m::DE) = m.a +POMDPs.n_states(m::DE) = length(m.s) +POMDPs.n_actions(m::DE) = length(m.a) +POMDPs.stateindex(m::DE, s) = m.smap[s] +POMDPs.actionindex(m::DE, a) = m.amap[a] + +POMDPs.observations(m::DEP) = m.o +POMDPs.n_observations(m::DEP) = length(m.o) +POMDPs.obsindex(m::DEP, o) = m.omap[o] +POMDPModelTools.obs_weight(m::DEP, a, sp, o) = m.ofun(a, sp, o) + +POMDPs.transition(m::DE, s, a) = m.tds[s,a] +POMDPs.observation(m::DEP, a, sp) = m.ods[a,sp] +POMDPs.reward(m::DE, s, a) = m.r(s, a) + +POMDPs.initialstate_distribution(m::DEP) = uniform_belief(m) +# XXX hack +POMDPs.initialstate_distribution(m::DiscreteExplicitMDP) = uniform_belief(FullyObservablePOMDP(m)) + +POMDPModelTools.ordered_states(m::DE) = m.s +POMDPModelTools.ordered_actions(m::DE) = m.a +POMDPModelTools.ordered_observations(m::DEP) = m.o + +# TODO reward(m, s, a) +# TODO support O(s, a, sp, o) +# TODO initial state distribution + +function DiscreteExplicitPOMDP(s, a, o, t, z, r, discount) + ss = vec(collect(s)) + as = vec(collect(a)) + os = vec(collect(o)) + ST = eltype(ss) + AT = eltype(as) + OT = eltype(os) + + tds = filltds(t, ss, as) + + # TODO parallelize? + ods = Dict{Tuple{AT, ST}, SparseCat{Vector{OT}, Vector{Float64}}}() + for u in as + for xp in ss + ys = OT[] + ps = Float64[] + for y in os + p = z(u, xp, y) + if p > 0.0 + push!(ys, y) + push!(ps, p) + end + end + ods[u,xp] = SparseCat(ys, ps) + end + end + + m = DiscreteExplicitPOMDP( + ss, as, os, + tds, ods, + o, r, + Dict(ss[i]=>i for i in 1:length(ss)), + Dict(as[i]=>i for i in 1:length(as)), + Dict(os[i]=>i for i in 1:length(os)), + discount + ) + + probability_check(m) + + return m +end + +function DiscreteExplicitMDP(s, a, t, r, discount) + ss = vec(collect(s)) + as = vec(collect(a)) + + tds = filltds(t, ss, as) + + m = DiscreteExplicitMDP( + ss, as, tds, r, + Dict(ss[i]=>i for i in 1:length(ss)), + Dict(as[i]=>i for i in 1:length(as)), + discount + ) + + trans_prob_consistency_check(m) + + return m +end + +function filltds(t, ss, as) + ST = eltype(ss) + AT = eltype(as) + tds = Dict{Tuple{ST, AT}, SparseCat{Vector{ST}, Vector{Float64}}}() + # TODO parallelize? + for x in ss + for u in as + xps = ST[] + ps = Float64[] + for xp in ss + p = t(x, u, xp) + if p > 0.0 + push!(xps, xp) + push!(ps, p) + end + end + tds[x,u] = SparseCat(xps, ps) + end + end + return tds +end diff --git a/test/discrete_explicit.jl b/test/discrete_explicit.jl new file mode 100644 index 0000000..d52d7ae --- /dev/null +++ b/test/discrete_explicit.jl @@ -0,0 +1,86 @@ +@testset "Discrete Explicit Tiger" begin + S = [:left, :right] + A = [:left, :right, :listen] + O = [:left, :right] + γ = 0.95 + + function T(s, a, sp) + if a == :listen + return s == sp + else # a door is opened + return 0.5 #reset + end + end + + function Z(a, sp, o) + if a == :listen + if o == sp + return 0.85 + else + return 0.15 + end + else + return 0.5 + end + end + + function R(s, a) + if a == :listen + return -1.0 + elseif s == a # the tiger was found + return -100.0 + else # the tiger was escaped + return 10.0 + end + end + + m = DiscreteExplicitPOMDP(S,A,O,T,Z,R,γ) + + solver = FunctionSolver(x->:listen) + policy = solve(solver, m) + updater = DiscreteUpdater(m) + + rsum = 0.0 + for (s,b,a,o,r) in stepthrough(m, policy, updater, "s,b,a,o,r", max_steps=10) + println("s: $s, b: $([pdf(b,s) for s in S]), a: $a, o: $o") + rsum += r + end + println("Undiscounted reward was $rsum.") + @test rsum == -10.0 +end + +@testset "Discrete Explicit MDP" begin + S = 1:5 + A = [-1, 1] + γ = 0.95 + + function T(s, a, sp) + if sp == clamp(s+a,1,5) + return 0.8 + elseif sp == clamp(s-a,1,5) + return 0.2 + else + return 0.0 + end + end + + function R(s, a) + if s == 5 + return 1.0 + else + return -1.0 + end + end + + m = DiscreteExplicitMDP(S,A,T,R,γ) + + solver = FunctionSolver(x->1) + policy = solve(solver, m) + + rsum = 0.0 + for (s,a,r) in stepthrough(m, policy, "s,a,r", max_steps=10) + println("s: $s, a: $a") + rsum += r + end + println("Undiscounted reward was $rsum.") +end diff --git a/test/runtests.jl b/test/runtests.jl index 4c363d8..c705717 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,6 @@ using QuickPOMDPs -using Base.Test +using Test -# write your own tests here -@test 1 == 2 +using POMDPs, POMDPPolicies, POMDPSimulators, BeliefUpdaters + +include("discrete_explicit.jl") From 21f88af4c0d3f8ed3cfe320b6e187f34d3f696a8 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Fri, 31 May 2019 10:27:10 -0700 Subject: [PATCH 2/9] added docstrings --- src/discrete_explicit.jl | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/discrete_explicit.jl b/src/discrete_explicit.jl index af670c1..b21a59b 100644 --- a/src/discrete_explicit.jl +++ b/src/discrete_explicit.jl @@ -53,7 +53,25 @@ POMDPModelTools.ordered_observations(m::DEP) = m.o # TODO reward(m, s, a) # TODO support O(s, a, sp, o) # TODO initial state distribution +# TODO convert_s, etc, dimensions +""" + DiscreteExplicitPOMDP(S,A,O,T,Z,R,γ) + +Create a POMDP defined by the tuple (S,A,O,T,Z,R,γ). + +# Arguments + +- `S`,`A`,`O`: State, action, and observation spaces (typically `Vector`s) +- `T::Function`: Transition probability distribution function; ``T(s,a,s')`` is the probability of transitioning to state ``s'`` from state ``s`` after taking action ``a``. +- `Z::Function`: Observation probability distribution function; ``O(a, s', o)`` is the probability of receiving observation ``o`` when state ``s'`` is reached after action ``a``. +- `R::Function`: Reward function; ``R(s,a)`` is the reward for taking action ``a`` in state ``s``. +- `γ::Float64`: Discount factor. + +# Notes +- The default initial state distribution is uniform across all states. Changing this is not yet supported, but it can be overridden for simulations. +- Terminal states are not yet supported, but absorbing states with zero reward can be used. +""" function DiscreteExplicitPOMDP(s, a, o, t, z, r, discount) ss = vec(collect(s)) as = vec(collect(a)) @@ -96,6 +114,22 @@ function DiscreteExplicitPOMDP(s, a, o, t, z, r, discount) return m end +""" + DiscreteExplicitMDP(S,A,T,R,γ) + +Create an MDP defined by the tuple (S,A,T,R,γ). + +# Arguments + +- `S`,`A`: State and action spaces (typically `Vector`s) +- `T::Function`: Transition probability distribution function; ``T(s,a,s')`` is the probability of transitioning to state ``s'`` from state ``s`` after taking action ``a``. +- `R::Function`: Reward function; ``R(s,a)`` is the reward for taking action ``a`` in state ``s``. +- `γ::Float64`: Discount factor. + +# Notes +- The default initial state distribution is uniform across all states. Changing this is not yet supported, but it can be overridden for simulations. +- Terminal states are not yet supported, but absorbing states with zero reward can be used. +""" function DiscreteExplicitMDP(s, a, t, r, discount) ss = vec(collect(s)) as = vec(collect(a)) From eb3bc7acd36ce6af1997e52ec756b603d56a14e2 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 1 Jun 2019 11:15:43 -0700 Subject: [PATCH 3/9] updated README --- IDEAS.md | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 128 ++++++++++++++---------------------------------------- 2 files changed, 155 insertions(+), 96 deletions(-) create mode 100644 IDEAS.md diff --git a/IDEAS.md b/IDEAS.md new file mode 100644 index 0000000..4eb42bf --- /dev/null +++ b/IDEAS.md @@ -0,0 +1,123 @@ +# QuickPOMDPs + +Eventually this will be a repository containing more simplified interfaces for expressing certain classes of POMDPs. The goal is for [POMDPs.jl]( https://github.com/JuliaPOMDP/POMDPs.jl) to act as a low level interface (like [MathProgBase](https://github.com/JuliaOpt/MathProgBase.jl)) and for the interface(s) defined here to act as concise and convenient high-level interface (like [JuMP](https://github.com/JuliaOpt/JuMP.jl) or [Convex](https://github.com/JuliaOpt/Convex.jl)). + +Another package that should be referenced when designing this is [PLite.jl](https://github.com/sisl/PLite.jl/blob/master/docs/README.md). + +Contributions of new interfaces for defining specific classes of problems are welcome! + +For now, there are just a few sketches of interfaces outlined below: + +# Interface Ideas + +## Basic Discrete + +Can represent any problem with discrete actions, observations, and states using the POMDPs.jl explicit interface. This would just be a tight wrapper over the POMDPs.jl interface and would look very similar to a pure POMDPs.jl implementation. Advantages over direct POMDPs.jl are that it's slightly more compact and **you don't have to understand object-oriented programming**. + +The Tiger problem would look like this: + +```julia +pomdp = @discretePOMDP begin + @states [:tiger_l, :tiger_r] + @actions [:open_l, :open_r, :listen] + @observations [:tiger_l, :tiger_r] + + @transition function (s, a) + if a == :listen + return [s]=>[1.0] + else + return [TIGER_L, TIGER_R]=>[0.5, 0.5] # reset + end + end + + @reward Dict((:tiger_l, :open_l) => -100., + (:tiger_r, :open_r) => -100., + (:tiger_l, :open_r) => 10., + (:tiger_r, :open_l) => 10. + ) + + @default_reward -1.0 + + @observation function (a, sp) + if a == :listen + if sp == :tiger_l + return [:tiger_l, :tiger_r]=>[0.85, 0.15] + else + return [:tiger_r, :tiger_l]=>[0.85, 0.15] + end + else + return [:tiger_l, :tiger_r]=>[0.5, 0.5] + end + end + + @initial [:tiger_l, :tiger_r]=>[0.5, 0.5] + @discount 0.95 +end +``` + +Note, this could also be done without any macros as a constructor with keyword arguments. Perhaps that would be easier to understand? + +## Generative Function + +Another common problem is one where the dynamics are given by a function. The crying baby problem would look something like this: + +```julia +pomdp = @generativePOMDP begin + @initial rng -> rand(rng) > 0.5 + + @dynamics function (s, a, rng) + if s # hungry + sp = true + else # not hungry + sp = rand(rng) < 0.1 ? true : false + end + if sp # hungry + o = rand(rng) < 0.8 ? true : false + else # not hungry + o = rand(rng) < 0.1 ? true : false + end + r = (s ? -10.0 : 0.0) + (a ? -5.0 : 0.0) + return s, o, r + end + + @discount 0.95 +end +``` + +Again, you could do this without macros, and just use keyword arguments. + +## Named Variables + +It might also be more clear what is going on if we declared variables with names as shown in the example below. + +This would be tougher to compile though, and it's not clear what the easiest way to express distributions or reward would be. + +Ideas welcome! + +```julia +mdp = @MDP begin + xmax = 10 + ymax = 10 + + @states begin + x in 1:10 + y in 1:10 + end + + @actions begin + dir in [:up, :down, :left, :right] + end + + @reward rdict = Dict( + #XXX no idea how to define this in terms of x and y + ) + default_reward = 0.0 + + @transition #XXX what is the most concise way to define the transition distribution?? + + terminal = vals(reward) + discount = 0.95 + + initial +end +``` diff --git a/README.md b/README.md index 1687ef3..b155b13 100644 --- a/README.md +++ b/README.md @@ -4,124 +4,60 @@ [![Coverage Status](https://coveralls.io/repos/zsunberg/QuickPOMDPs.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/zsunberg/QuickPOMDPs.jl?branch=master) [![codecov.io](http://codecov.io/github/zsunberg/QuickPOMDPs.jl/coverage.svg?branch=master)](http://codecov.io/github/zsunberg/QuickPOMDPs.jl?branch=master) -Eventually this will be a repository containing one or more simplified interfaces for expressing certain classes of POMDPs. The goal is for [POMDPs.jl]( https://github.com/JuliaPOMDP/POMDPs.jl) to act as a low level interface (like [MathProgBase](https://github.com/JuliaOpt/MathProgBase.jl)) and for the interface(s) defined here to act as concise and convenient high-level interface (like [JuMP](https://github.com/JuliaOpt/JuMP.jl) or [Convex](https://github.com/JuliaOpt/Convex.jl)). +Simplified Interface for specifying [POMDPs.jl](https://github.com/JuliaPOMDP/POMDPs.jl) models. -Another package that should be referenced when designing this is [PLite.jl](https://github.com/sisl/PLite.jl/blob/master/docs/README.md). +For now there is only one interface (Discrete Explicit), but more may be added (see [IDEAS.md]()). -Contributions of new interfaces for defining specific classes of problems are welcome! +## Discrete Explicit Interface -For now, there are just a few sketches of interfaces outlined below: +This interface is designed to match the standard definition of a POMDP in the literature as closely as possible. The standard definition uses the tuple (S,A,O,T,Z,R,γ) for a POMDP and (S,A,T,R,γ) for an MDP, where -# Interface Ideas +- S, A, and O are the state, action, and observation spaces, +- T and Z are the transition and observation probability distribution functions (pdfs), +- R is the reward function, and +- γ is the discount factor. -## Basic Discrete +The `DiscreteExplicitPOMDP` and `DiscreteExplicitMDP` types are provided for POMDPs and MDPs with discrete spaces and explicitly defined distributions. They should offer moderately good performance on small to medium-sized problems. -Can represent any problem with discrete actions, observations, and states using the POMDPs.jl explicit interface. This would just be a tight wrapper over the POMDPs.jl interface and would look very similar to a pure POMDPs.jl implementation. Advantages over direct POMDPs.jl are that it's slightly more compact and **you don't have to understand object-oriented programming**. +### Example -The Tiger problem would look like this: +The classic tiger POMDP [Kaelbling et al. 98](http://www.sciencedirect.com/science/article/pii/S000437029800023X) can be defined as follows: ```julia -pomdp = @discretePOMDP begin - @states [:tiger_l, :tiger_r] - @actions [:open_l, :open_r, :listen] - @observations [:tiger_l, :tiger_r] + S = [:left, :right] # S, A, and O may contain any objects + A = [:left, :right, :listen] # including user-defined types + O = [:left, :right] + γ = 0.95 - @transition function (s, a) + function T(s, a, sp) if a == :listen - return [s]=>[1.0] - else - return [TIGER_L, TIGER_R]=>[0.5, 0.5] # reset + return s == sp + else # a door is opened + return 0.5 #reset end end - @reward Dict((:tiger_l, :open_l) => -100., - (:tiger_r, :open_r) => -100., - (:tiger_l, :open_r) => 10., - (:tiger_r, :open_l) => 10. - ) - - @default_reward -1.0 - - @observation function (a, sp) + function Z(a, sp, o) if a == :listen - if sp == :tiger_l - return [:tiger_l, :tiger_r]=>[0.85, 0.15] + if o == sp + return 0.85 else - return [:tiger_r, :tiger_l]=>[0.85, 0.15] + return 0.15 end else - return [:tiger_l, :tiger_r]=>[0.5, 0.5] + return 0.5 end end - @initial [:tiger_l, :tiger_r]=>[0.5, 0.5] - @discount 0.95 -end -``` - -Note, this could also be done without any macros as a constructor with keyword arguments. Perhaps that would be easier to understand? - -## Generative Function - -Another common problem is one where the dynamics are given by a function. The crying baby problem would look something like this: - -```julia -pomdp = @generativePOMDP begin - @initial rng -> rand(rng) > 0.5 - - @dynamics function (s, a, rng) - if s # hungry - sp = true - else # not hungry - sp = rand(rng) < 0.1 ? true : false - end - if sp # hungry - o = rand(rng) < 0.8 ? true : false - else # not hungry - o = rand(rng) < 0.1 ? true : false + function R(s, a) + if a == :listen + return -1.0 + elseif s == a # the tiger was found + return -100.0 + else # the tiger was escaped + return 10.0 end - r = (s ? -10.0 : 0.0) + (a ? -5.0 : 0.0) - return s, o, r - end - - @discount 0.95 -end -``` - -Again, you could do this without macros, and just use keyword arguments. - -## Named Variables - -It might also be more clear what is going on if we declared variables with names as shown in the example below. - -This would be tougher to compile though, and it's not clear what the easiest way to express distributions or reward would be. - -Ideas welcome! - -```julia -mdp = @MDP begin - xmax = 10 - ymax = 10 - - @states begin - x in 1:10 - y in 1:10 - end - - @actions begin - dir in [:up, :down, :left, :right] end - @reward rdict = Dict( - #XXX no idea how to define this in terms of x and y - ) - default_reward = 0.0 - - @transition #XXX what is the most concise way to define the transition distribution?? - - terminal = vals(reward) - discount = 0.95 - - initial -end + m = DiscreteExplicitPOMDP(S,A,O,T,Z,R,γ) ``` From ad557a9ee15f59f3bd07f15e229fa25352c28abd Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 1 Jun 2019 11:28:31 -0700 Subject: [PATCH 4/9] updated badge urls to JuliaPOMDP --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b155b13..59368a3 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # QuickPOMDPs -[![Build Status](https://travis-ci.org/zsunberg/QuickPOMDPs.jl.svg?branch=master)](https://travis-ci.org/zsunberg/QuickPOMDPs.jl) -[![Coverage Status](https://coveralls.io/repos/zsunberg/QuickPOMDPs.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/zsunberg/QuickPOMDPs.jl?branch=master) -[![codecov.io](http://codecov.io/github/zsunberg/QuickPOMDPs.jl/coverage.svg?branch=master)](http://codecov.io/github/zsunberg/QuickPOMDPs.jl?branch=master) +[![Build Status](https://travis-ci.org/JuliaPOMDP/QuickPOMDPs.jl.svg?branch=master)](https://travis-ci.org/JuliaPOMDP/QuickPOMDPs.jl) +[![Coverage Status](https://coveralls.io/repos/JuliaPOMDP/QuickPOMDPs.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/JuliaPOMDP/QuickPOMDPs.jl?branch=master) +[![codecov.io](http://codecov.io/github/JuliaPOMDP/QuickPOMDPs.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaPOMDP/QuickPOMDPs.jl?branch=master) Simplified Interface for specifying [POMDPs.jl](https://github.com/JuliaPOMDP/POMDPs.jl) models. From 81e86ed89502023529d4d5509e64fc90550c07be Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 1 Jun 2019 11:38:46 -0700 Subject: [PATCH 5/9] update travis --- .travis.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 581f4ed..29c342f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,10 +2,6 @@ language: julia os: - linux - - osx -julia: - - 0.6 - - nightly notifications: email: false git: From 9a14aaa370287a6f6accff1010a61b2e11af25ec Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 1 Jun 2019 11:40:58 -0700 Subject: [PATCH 6/9] trying "stable" on travis --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 29c342f..982b684 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,8 @@ language: julia os: - linux +julia: + - stable notifications: email: false git: From b6853dbec6756625272c7642de0e3ae3747cbe4d Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 1 Jun 2019 11:42:56 -0700 Subject: [PATCH 7/9] travis to 1.0 and 1.1 --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 982b684..c91d522 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,8 @@ language: julia os: - linux julia: - - stable + - 1.0 + - 1.1 notifications: email: false git: From 0011d6671dcc755930e44c56794bf963dc509aa6 Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 1 Jun 2019 12:12:20 -0700 Subject: [PATCH 8/9] fix error with observation function --- src/discrete_explicit.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/discrete_explicit.jl b/src/discrete_explicit.jl index b21a59b..6b2d93a 100644 --- a/src/discrete_explicit.jl +++ b/src/discrete_explicit.jl @@ -54,6 +54,7 @@ POMDPModelTools.ordered_observations(m::DEP) = m.o # TODO support O(s, a, sp, o) # TODO initial state distribution # TODO convert_s, etc, dimensions +# TODO better errors if T or Z return something unexpected """ DiscreteExplicitPOMDP(S,A,O,T,Z,R,γ) @@ -102,7 +103,7 @@ function DiscreteExplicitPOMDP(s, a, o, t, z, r, discount) m = DiscreteExplicitPOMDP( ss, as, os, tds, ods, - o, r, + z, r, Dict(ss[i]=>i for i in 1:length(ss)), Dict(as[i]=>i for i in 1:length(as)), Dict(os[i]=>i for i in 1:length(os)), From d331e65e33798afbd18980d7f34c808e340e0cdd Mon Sep 17 00:00:00 2001 From: Zachary Sunberg Date: Sat, 1 Jun 2019 12:14:46 -0700 Subject: [PATCH 9/9] link to IDEAS --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 59368a3..815bf88 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Simplified Interface for specifying [POMDPs.jl](https://github.com/JuliaPOMDP/POMDPs.jl) models. -For now there is only one interface (Discrete Explicit), but more may be added (see [IDEAS.md]()). +For now there is only one interface (Discrete Explicit), but more may be added (see [IDEAS.md](IDEAS.md)). ## Discrete Explicit Interface