From f1541caf94a303c7a57ce36ada890293ee4df1c9 Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Fri, 31 May 2019 10:03:47 -0700
Subject: [PATCH 1/9] implemented discrete explicit models

---
 Project.toml              |  18 +++++
 REQUIRE                   |   1 -
 src/QuickPOMDPs.jl        |  11 ++-
 src/discrete_explicit.jl  | 137 ++++++++++++++++++++++++++++++++++++++
 test/discrete_explicit.jl |  86 ++++++++++++++++++++++++
 test/runtests.jl          |   7 +-
 6 files changed, 255 insertions(+), 5 deletions(-)
 create mode 100644 Project.toml
 delete mode 100644 REQUIRE
 create mode 100644 src/discrete_explicit.jl
 create mode 100644 test/discrete_explicit.jl

diff --git a/Project.toml b/Project.toml
new file mode 100644
index 0000000..417ad95
--- /dev/null
+++ b/Project.toml
@@ -0,0 +1,18 @@
+name = "QuickPOMDPs"
+uuid = "8af83fb2-a731-493c-9049-9e19dbce6165"
+authors = ["Zachary Sunberg <sunbergzach@gmail.com>"]
+version = "0.1.0"
+
+[deps]
+BeliefUpdaters = "8bb6e9a1-7d73-552c-a44a-e5dc5634aac4"
+POMDPModelTools = "08074719-1b2a-587c-a292-00f91cc44415"
+POMDPTesting = "92e6a534-49c2-5324-9027-86e3c861ab81"
+POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d"
+
+[extras]
+POMDPPolicies = "182e52fb-cfd0-5e46-8c26-fd0667c990f4"
+POMDPSimulators = "e0d0a172-29c6-5d4e-96d0-f262df5d01fd"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test", "POMDPPolicies", "POMDPSimulators"]
diff --git a/REQUIRE b/REQUIRE
deleted file mode 100644
index 137767a..0000000
--- a/REQUIRE
+++ /dev/null
@@ -1 +0,0 @@
-julia 0.6
diff --git a/src/QuickPOMDPs.jl b/src/QuickPOMDPs.jl
index a14ecf7..ed17fb7 100644
--- a/src/QuickPOMDPs.jl
+++ b/src/QuickPOMDPs.jl
@@ -1,5 +1,14 @@
 module QuickPOMDPs
 
-# package code goes here
+using POMDPs
+using POMDPModelTools
+using BeliefUpdaters
+using POMDPTesting
+
+export
+    DiscreteExplicitPOMDP,
+    DiscreteExplicitMDP
+
+include("discrete_explicit.jl")
 
 end # module
diff --git a/src/discrete_explicit.jl b/src/discrete_explicit.jl
new file mode 100644
index 0000000..af670c1
--- /dev/null
+++ b/src/discrete_explicit.jl
@@ -0,0 +1,137 @@
+struct DiscreteExplicitPOMDP{S,A,O,OF,RF} <: POMDP{S,A,O}
+    s::Vector{S}
+    a::Vector{A}
+    o::Vector{O}
+    tds::Dict{Tuple{S,A}, SparseCat{Vector{S}, Vector{Float64}}}
+    ods::Dict{Tuple{A,S}, SparseCat{Vector{O}, Vector{Float64}}}
+    ofun::OF
+    r::RF
+    smap::Dict{S,Int}
+    amap::Dict{A,Int}
+    omap::Dict{O,Int}
+    discount::Float64
+end
+
+struct DiscreteExplicitMDP{S,A,RF} <: MDP{S,A}
+    s::Vector{S}
+    a::Vector{A}
+    tds::Dict{Tuple{S,A}, SparseCat{Vector{S}, Vector{Float64}}}
+    r::RF
+    smap::Dict{S,Int}
+    amap::Dict{A,Int}
+    discount::Float64
+end
+
+const DEP = DiscreteExplicitPOMDP
+const DE = Union{DiscreteExplicitPOMDP,DiscreteExplicitMDP}
+
+POMDPs.discount(m::DE) = m.discount
+POMDPs.states(m::DE) = m.s
+POMDPs.actions(m::DE) = m.a
+POMDPs.n_states(m::DE) = length(m.s)
+POMDPs.n_actions(m::DE) = length(m.a)
+POMDPs.stateindex(m::DE, s) = m.smap[s]
+POMDPs.actionindex(m::DE, a) = m.amap[a]
+
+POMDPs.observations(m::DEP) = m.o
+POMDPs.n_observations(m::DEP) = length(m.o)
+POMDPs.obsindex(m::DEP, o) = m.omap[o]
+POMDPModelTools.obs_weight(m::DEP, a, sp, o) = m.ofun(a, sp, o)
+
+POMDPs.transition(m::DE, s, a) = m.tds[s,a]
+POMDPs.observation(m::DEP, a, sp) = m.ods[a,sp]
+POMDPs.reward(m::DE, s, a) = m.r(s, a)
+
+POMDPs.initialstate_distribution(m::DEP) = uniform_belief(m)
+# XXX hack
+POMDPs.initialstate_distribution(m::DiscreteExplicitMDP) = uniform_belief(FullyObservablePOMDP(m))
+
+POMDPModelTools.ordered_states(m::DE) = m.s
+POMDPModelTools.ordered_actions(m::DE) = m.a
+POMDPModelTools.ordered_observations(m::DEP) = m.o
+
+# TODO reward(m, s, a)
+# TODO support O(s, a, sp, o)
+# TODO initial state distribution
+
+function DiscreteExplicitPOMDP(s, a, o, t, z, r, discount)
+    ss = vec(collect(s))
+    as = vec(collect(a))
+    os = vec(collect(o))
+    ST = eltype(ss)
+    AT = eltype(as)
+    OT = eltype(os)
+
+    tds = filltds(t, ss, as)
+
+    # TODO parallelize?
+    ods = Dict{Tuple{AT, ST}, SparseCat{Vector{OT}, Vector{Float64}}}()
+    for u in as
+        for xp in ss
+            ys = OT[]
+            ps = Float64[]
+            for y in os
+                p = z(u, xp, y)
+                if p > 0.0
+                    push!(ys, y)
+                    push!(ps, p)
+                end
+            end
+            ods[u,xp] = SparseCat(ys, ps)
+        end
+    end
+
+    m = DiscreteExplicitPOMDP(
+        ss, as, os,
+        tds, ods,
+        o, r,
+        Dict(ss[i]=>i for i in 1:length(ss)),
+        Dict(as[i]=>i for i in 1:length(as)),
+        Dict(os[i]=>i for i in 1:length(os)),
+        discount
+    )
+
+    probability_check(m)
+
+    return m
+end
+
+function DiscreteExplicitMDP(s, a, t, r, discount)
+    ss = vec(collect(s))
+    as = vec(collect(a))
+
+    tds = filltds(t, ss, as)
+
+    m = DiscreteExplicitMDP(
+        ss, as, tds, r,
+        Dict(ss[i]=>i for i in 1:length(ss)),
+        Dict(as[i]=>i for i in 1:length(as)),
+        discount
+    )
+
+    trans_prob_consistency_check(m)
+
+    return m
+end
+
+function filltds(t, ss, as)
+    ST = eltype(ss)
+    AT = eltype(as)
+    tds = Dict{Tuple{ST, AT}, SparseCat{Vector{ST}, Vector{Float64}}}()
+    # TODO parallelize?
+    for x in ss
+        for u in as
+            xps = ST[]
+            ps = Float64[]
+            for xp in ss
+                p = t(x, u, xp)
+                if p > 0.0
+                    push!(xps, xp)
+                    push!(ps, p)
+                end
+            end
+            tds[x,u] = SparseCat(xps, ps)
+        end
+    end
+    return tds
+end
diff --git a/test/discrete_explicit.jl b/test/discrete_explicit.jl
new file mode 100644
index 0000000..d52d7ae
--- /dev/null
+++ b/test/discrete_explicit.jl
@@ -0,0 +1,86 @@
+@testset "Discrete Explicit Tiger" begin
+    S = [:left, :right]
+    A = [:left, :right, :listen]
+    O = [:left, :right]
+    γ = 0.95
+
+    function T(s, a, sp)
+        if a == :listen
+            return s == sp
+        else # a door is opened
+            return 0.5 #reset
+        end
+    end
+
+    function Z(a, sp, o)
+        if a == :listen
+            if o == sp
+                return 0.85
+            else
+                return 0.15
+            end
+        else
+            return 0.5
+        end
+    end
+
+    function R(s, a)
+        if a == :listen  
+            return -1.0
+        elseif s == a # the tiger was found
+            return -100.0
+        else # the tiger was escaped
+            return 10.0
+        end
+    end
+
+    m = DiscreteExplicitPOMDP(S,A,O,T,Z,R,γ)
+
+    solver = FunctionSolver(x->:listen)
+    policy = solve(solver, m)
+    updater = DiscreteUpdater(m)
+
+    rsum = 0.0
+    for (s,b,a,o,r) in stepthrough(m, policy, updater, "s,b,a,o,r", max_steps=10)
+        println("s: $s, b: $([pdf(b,s) for s in S]), a: $a, o: $o")
+        rsum += r
+    end
+    println("Undiscounted reward was $rsum.")
+    @test rsum == -10.0
+end
+
+@testset "Discrete Explicit MDP" begin
+    S = 1:5
+    A = [-1, 1]
+    γ = 0.95
+
+    function T(s, a, sp)
+        if sp == clamp(s+a,1,5)
+            return 0.8
+        elseif sp == clamp(s-a,1,5)
+            return 0.2
+        else
+            return 0.0
+        end
+    end
+
+    function R(s, a)
+        if s == 5
+            return 1.0
+        else
+            return -1.0
+        end
+    end
+
+    m = DiscreteExplicitMDP(S,A,T,R,γ)
+
+    solver = FunctionSolver(x->1)
+    policy = solve(solver, m)
+
+    rsum = 0.0
+    for (s,a,r) in stepthrough(m, policy, "s,a,r", max_steps=10)
+        println("s: $s, a: $a")
+        rsum += r
+    end
+    println("Undiscounted reward was $rsum.")
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 4c363d8..c705717 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,5 +1,6 @@
 using QuickPOMDPs
-using Base.Test
+using Test
 
-# write your own tests here
-@test 1 == 2
+using POMDPs, POMDPPolicies, POMDPSimulators, BeliefUpdaters
+
+include("discrete_explicit.jl")

From 21f88af4c0d3f8ed3cfe320b6e187f34d3f696a8 Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Fri, 31 May 2019 10:27:10 -0700
Subject: [PATCH 2/9] added docstrings

---
 src/discrete_explicit.jl | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/src/discrete_explicit.jl b/src/discrete_explicit.jl
index af670c1..b21a59b 100644
--- a/src/discrete_explicit.jl
+++ b/src/discrete_explicit.jl
@@ -53,7 +53,25 @@ POMDPModelTools.ordered_observations(m::DEP) = m.o
 # TODO reward(m, s, a)
 # TODO support O(s, a, sp, o)
 # TODO initial state distribution
+# TODO convert_s, etc, dimensions
 
+"""
+    DiscreteExplicitPOMDP(S,A,O,T,Z,R,γ)
+
+Create a POMDP defined by the tuple (S,A,O,T,Z,R,γ).
+
+# Arguments
+
+- `S`,`A`,`O`: State, action, and observation spaces (typically `Vector`s)
+- `T::Function`: Transition probability distribution function; ``T(s,a,s')`` is the probability of transitioning to state ``s'`` from state ``s`` after taking action ``a``.
+- `Z::Function`: Observation probability distribution function; ``O(a, s', o)`` is the probability of receiving observation ``o`` when state ``s'`` is reached after action ``a``.
+- `R::Function`: Reward function; ``R(s,a)`` is the reward for taking action ``a`` in state ``s``.
+- `γ::Float64`: Discount factor.
+
+# Notes
+- The default initial state distribution is uniform across all states. Changing this is not yet supported, but it can be overridden for simulations.
+- Terminal states are not yet supported, but absorbing states with zero reward can be used.
+"""
 function DiscreteExplicitPOMDP(s, a, o, t, z, r, discount)
     ss = vec(collect(s))
     as = vec(collect(a))
@@ -96,6 +114,22 @@ function DiscreteExplicitPOMDP(s, a, o, t, z, r, discount)
     return m
 end
 
+"""
+    DiscreteExplicitMDP(S,A,T,R,γ)
+
+Create an MDP defined by the tuple (S,A,T,R,γ).
+
+# Arguments
+
+- `S`,`A`: State and action spaces (typically `Vector`s)
+- `T::Function`: Transition probability distribution function; ``T(s,a,s')`` is the probability of transitioning to state ``s'`` from state ``s`` after taking action ``a``.
+- `R::Function`: Reward function; ``R(s,a)`` is the reward for taking action ``a`` in state ``s``.
+- `γ::Float64`: Discount factor.
+
+# Notes
+- The default initial state distribution is uniform across all states. Changing this is not yet supported, but it can be overridden for simulations.
+- Terminal states are not yet supported, but absorbing states with zero reward can be used.
+"""
 function DiscreteExplicitMDP(s, a, t, r, discount)
     ss = vec(collect(s))
     as = vec(collect(a))

From eb3bc7acd36ce6af1997e52ec756b603d56a14e2 Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Sat, 1 Jun 2019 11:15:43 -0700
Subject: [PATCH 3/9] updated README

---
 IDEAS.md  | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md | 128 ++++++++++++++----------------------------------------
 2 files changed, 155 insertions(+), 96 deletions(-)
 create mode 100644 IDEAS.md

diff --git a/IDEAS.md b/IDEAS.md
new file mode 100644
index 0000000..4eb42bf
--- /dev/null
+++ b/IDEAS.md
@@ -0,0 +1,123 @@
+# QuickPOMDPs
+
+Eventually this will be a repository containing more simplified interfaces for expressing certain classes of POMDPs. The goal is for [POMDPs.jl]( https://github.com/JuliaPOMDP/POMDPs.jl) to act as a low level interface (like [MathProgBase](https://github.com/JuliaOpt/MathProgBase.jl)) and for the interface(s) defined here to act as concise and convenient high-level interface (like [JuMP](https://github.com/JuliaOpt/JuMP.jl) or [Convex](https://github.com/JuliaOpt/Convex.jl)).
+
+Another package that should be referenced when designing this is [PLite.jl](https://github.com/sisl/PLite.jl/blob/master/docs/README.md).
+
+Contributions of new interfaces for defining specific classes of problems are welcome!
+
+For now, there are just a few sketches of interfaces outlined below:
+
+# Interface Ideas
+
+## Basic Discrete
+
+Can represent any problem with discrete actions, observations, and states using the POMDPs.jl explicit interface. This would just be a tight wrapper over the POMDPs.jl interface and would look very similar to a pure POMDPs.jl implementation. Advantages over direct POMDPs.jl are that it's slightly more compact and **you don't have to understand object-oriented programming**.
+
+The Tiger problem would look like this:
+
+```julia
+pomdp = @discretePOMDP begin
+    @states [:tiger_l, :tiger_r]
+    @actions [:open_l, :open_r, :listen]
+    @observations [:tiger_l, :tiger_r]
+
+    @transition function (s, a)
+        if a == :listen
+            return [s]=>[1.0]
+        else 
+            return [TIGER_L, TIGER_R]=>[0.5, 0.5] # reset
+        end
+    end
+
+    @reward Dict((:tiger_l, :open_l) => -100.,
+                  (:tiger_r, :open_r) => -100.,
+                  (:tiger_l, :open_r) => 10.,
+                  (:tiger_r, :open_l) => 10.
+                 )
+
+    @default_reward -1.0
+
+    @observation function (a, sp)
+        if a == :listen
+            if sp == :tiger_l
+                return [:tiger_l, :tiger_r]=>[0.85, 0.15]
+            else
+                return [:tiger_r, :tiger_l]=>[0.85, 0.15]
+            end
+        else
+            return [:tiger_l, :tiger_r]=>[0.5, 0.5]
+        end
+    end
+
+    @initial [:tiger_l, :tiger_r]=>[0.5, 0.5]
+    @discount 0.95
+end
+```
+
+Note, this could also be done without any macros as a constructor with keyword arguments. Perhaps that would be easier to understand?
+
+## Generative Function
+
+Another common problem is one where the dynamics are given by a function. The crying baby problem would look something like this:
+
+```julia
+pomdp = @generativePOMDP begin
+    @initial rng -> rand(rng) > 0.5
+
+    @dynamics function (s, a, rng)
+        if s # hungry
+            sp = true
+        else # not hungry
+            sp = rand(rng) < 0.1 ? true : false
+        end
+        if sp # hungry
+            o = rand(rng) < 0.8 ? true : false
+        else # not hungry
+            o = rand(rng) < 0.1 ? true : false
+        end
+        r = (s ? -10.0 : 0.0) + (a ? -5.0 : 0.0)
+        return s, o, r
+    end
+
+    @discount 0.95
+end
+```
+
+Again, you could do this without macros, and just use keyword arguments.
+
+## Named Variables
+
+It might also be more clear what is going on if we declared variables with names as shown in the example below.
+
+This would be tougher to compile though, and it's not clear what the easiest way to express distributions or reward would be.
+
+Ideas welcome!
+
+```julia
+mdp = @MDP begin
+    xmax = 10
+    ymax = 10
+
+    @states begin
+        x in 1:10
+        y in 1:10
+    end
+
+    @actions begin
+        dir in [:up, :down, :left, :right]
+    end
+
+    @reward rdict = Dict(
+                    #XXX no idea how to define this in terms of x and y
+                 )
+    default_reward = 0.0
+
+    @transition #XXX what is the most concise way to define the transition distribution??
+
+    terminal = vals(reward)
+    discount = 0.95
+
+    initial
+end
+```
diff --git a/README.md b/README.md
index 1687ef3..b155b13 100644
--- a/README.md
+++ b/README.md
@@ -4,124 +4,60 @@
 [![Coverage Status](https://coveralls.io/repos/zsunberg/QuickPOMDPs.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/zsunberg/QuickPOMDPs.jl?branch=master)
 [![codecov.io](http://codecov.io/github/zsunberg/QuickPOMDPs.jl/coverage.svg?branch=master)](http://codecov.io/github/zsunberg/QuickPOMDPs.jl?branch=master)
 
-Eventually this will be a repository containing one or more simplified interfaces for expressing certain classes of POMDPs. The goal is for [POMDPs.jl]( https://github.com/JuliaPOMDP/POMDPs.jl) to act as a low level interface (like [MathProgBase](https://github.com/JuliaOpt/MathProgBase.jl)) and for the interface(s) defined here to act as concise and convenient high-level interface (like [JuMP](https://github.com/JuliaOpt/JuMP.jl) or [Convex](https://github.com/JuliaOpt/Convex.jl)).
+Simplified Interface for specifying [POMDPs.jl](https://github.com/JuliaPOMDP/POMDPs.jl) models.
 
-Another package that should be referenced when designing this is [PLite.jl](https://github.com/sisl/PLite.jl/blob/master/docs/README.md).
+For now there is only one interface (Discrete Explicit), but more may be added (see [IDEAS.md]()).
 
-Contributions of new interfaces for defining specific classes of problems are welcome!
+## Discrete Explicit Interface
 
-For now, there are just a few sketches of interfaces outlined below:
+This interface is designed to match the standard definition of a POMDP in the literature as closely as possible. The standard definition uses the tuple (S,A,O,T,Z,R,γ) for a POMDP and (S,A,T,R,γ) for an MDP, where
 
-# Interface Ideas
+- S, A, and O are the state, action, and observation spaces,
+- T and Z are the transition and observation probability distribution functions (pdfs),
+- R is the reward function, and
+- γ is the discount factor.
 
-## Basic Discrete
+The `DiscreteExplicitPOMDP` and `DiscreteExplicitMDP` types are provided for POMDPs and MDPs with discrete spaces and explicitly defined distributions. They should offer moderately good performance on small to medium-sized problems.
 
-Can represent any problem with discrete actions, observations, and states using the POMDPs.jl explicit interface. This would just be a tight wrapper over the POMDPs.jl interface and would look very similar to a pure POMDPs.jl implementation. Advantages over direct POMDPs.jl are that it's slightly more compact and **you don't have to understand object-oriented programming**.
+### Example
 
-The Tiger problem would look like this:
+The classic tiger POMDP [Kaelbling et al. 98](http://www.sciencedirect.com/science/article/pii/S000437029800023X) can be defined as follows:
 
 ```julia
-pomdp = @discretePOMDP begin
-    @states [:tiger_l, :tiger_r]
-    @actions [:open_l, :open_r, :listen]
-    @observations [:tiger_l, :tiger_r]
+    S = [:left, :right]           # S, A, and O may contain any objects
+    A = [:left, :right, :listen]  # including user-defined types
+    O = [:left, :right]
+    γ = 0.95
 
-    @transition function (s, a)
+    function T(s, a, sp)
         if a == :listen
-            return [s]=>[1.0]
-        else 
-            return [TIGER_L, TIGER_R]=>[0.5, 0.5] # reset
+            return s == sp
+        else # a door is opened
+            return 0.5 #reset
         end
     end
 
-    @reward Dict((:tiger_l, :open_l) => -100.,
-                  (:tiger_r, :open_r) => -100.,
-                  (:tiger_l, :open_r) => 10.,
-                  (:tiger_r, :open_l) => 10.
-                 )
-
-    @default_reward -1.0
-
-    @observation function (a, sp)
+    function Z(a, sp, o)
         if a == :listen
-            if sp == :tiger_l
-                return [:tiger_l, :tiger_r]=>[0.85, 0.15]
+            if o == sp
+                return 0.85
             else
-                return [:tiger_r, :tiger_l]=>[0.85, 0.15]
+                return 0.15
             end
         else
-            return [:tiger_l, :tiger_r]=>[0.5, 0.5]
+            return 0.5
         end
     end
 
-    @initial [:tiger_l, :tiger_r]=>[0.5, 0.5]
-    @discount 0.95
-end
-```
-
-Note, this could also be done without any macros as a constructor with keyword arguments. Perhaps that would be easier to understand?
-
-## Generative Function
-
-Another common problem is one where the dynamics are given by a function. The crying baby problem would look something like this:
-
-```julia
-pomdp = @generativePOMDP begin
-    @initial rng -> rand(rng) > 0.5
-
-    @dynamics function (s, a, rng)
-        if s # hungry
-            sp = true
-        else # not hungry
-            sp = rand(rng) < 0.1 ? true : false
-        end
-        if sp # hungry
-            o = rand(rng) < 0.8 ? true : false
-        else # not hungry
-            o = rand(rng) < 0.1 ? true : false
+    function R(s, a)
+        if a == :listen  
+            return -1.0
+        elseif s == a # the tiger was found
+            return -100.0
+        else # the tiger was escaped
+            return 10.0
         end
-        r = (s ? -10.0 : 0.0) + (a ? -5.0 : 0.0)
-        return s, o, r
-    end
-
-    @discount 0.95
-end
-```
-
-Again, you could do this without macros, and just use keyword arguments.
-
-## Named Variables
-
-It might also be more clear what is going on if we declared variables with names as shown in the example below.
-
-This would be tougher to compile though, and it's not clear what the easiest way to express distributions or reward would be.
-
-Ideas welcome!
-
-```julia
-mdp = @MDP begin
-    xmax = 10
-    ymax = 10
-
-    @states begin
-        x in 1:10
-        y in 1:10
-    end
-
-    @actions begin
-        dir in [:up, :down, :left, :right]
     end
 
-    @reward rdict = Dict(
-                    #XXX no idea how to define this in terms of x and y
-                 )
-    default_reward = 0.0
-
-    @transition #XXX what is the most concise way to define the transition distribution??
-
-    terminal = vals(reward)
-    discount = 0.95
-
-    initial
-end
+    m = DiscreteExplicitPOMDP(S,A,O,T,Z,R,γ)
 ```

From ad557a9ee15f59f3bd07f15e229fa25352c28abd Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Sat, 1 Jun 2019 11:28:31 -0700
Subject: [PATCH 4/9] updated badge urls to JuliaPOMDP

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index b155b13..59368a3 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,8 @@
 # QuickPOMDPs
 
-[![Build Status](https://travis-ci.org/zsunberg/QuickPOMDPs.jl.svg?branch=master)](https://travis-ci.org/zsunberg/QuickPOMDPs.jl)
-[![Coverage Status](https://coveralls.io/repos/zsunberg/QuickPOMDPs.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/zsunberg/QuickPOMDPs.jl?branch=master)
-[![codecov.io](http://codecov.io/github/zsunberg/QuickPOMDPs.jl/coverage.svg?branch=master)](http://codecov.io/github/zsunberg/QuickPOMDPs.jl?branch=master)
+[![Build Status](https://travis-ci.org/JuliaPOMDP/QuickPOMDPs.jl.svg?branch=master)](https://travis-ci.org/JuliaPOMDP/QuickPOMDPs.jl)
+[![Coverage Status](https://coveralls.io/repos/JuliaPOMDP/QuickPOMDPs.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/JuliaPOMDP/QuickPOMDPs.jl?branch=master)
+[![codecov.io](http://codecov.io/github/JuliaPOMDP/QuickPOMDPs.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaPOMDP/QuickPOMDPs.jl?branch=master)
 
 Simplified Interface for specifying [POMDPs.jl](https://github.com/JuliaPOMDP/POMDPs.jl) models.
 

From 81e86ed89502023529d4d5509e64fc90550c07be Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Sat, 1 Jun 2019 11:38:46 -0700
Subject: [PATCH 5/9] update travis

---
 .travis.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 581f4ed..29c342f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,10 +2,6 @@
 language: julia
 os:
   - linux
-  - osx
-julia:
-  - 0.6
-  - nightly
 notifications:
   email: false
 git:

From 9a14aaa370287a6f6accff1010a61b2e11af25ec Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Sat, 1 Jun 2019 11:40:58 -0700
Subject: [PATCH 6/9] trying "stable" on travis

---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 29c342f..982b684 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,8 @@
 language: julia
 os:
   - linux
+julia:
+  - stable
 notifications:
   email: false
 git:

From b6853dbec6756625272c7642de0e3ae3747cbe4d Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Sat, 1 Jun 2019 11:42:56 -0700
Subject: [PATCH 7/9] travis to 1.0 and 1.1

---
 .travis.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 982b684..c91d522 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,8 @@ language: julia
 os:
   - linux
 julia:
-  - stable
+  - 1.0
+  - 1.1
 notifications:
   email: false
 git:

From 0011d6671dcc755930e44c56794bf963dc509aa6 Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Sat, 1 Jun 2019 12:12:20 -0700
Subject: [PATCH 8/9] fix error with observation function

---
 src/discrete_explicit.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/discrete_explicit.jl b/src/discrete_explicit.jl
index b21a59b..6b2d93a 100644
--- a/src/discrete_explicit.jl
+++ b/src/discrete_explicit.jl
@@ -54,6 +54,7 @@ POMDPModelTools.ordered_observations(m::DEP) = m.o
 # TODO support O(s, a, sp, o)
 # TODO initial state distribution
 # TODO convert_s, etc, dimensions
+# TODO better errors if T or Z return something unexpected
 
 """
     DiscreteExplicitPOMDP(S,A,O,T,Z,R,γ)
@@ -102,7 +103,7 @@ function DiscreteExplicitPOMDP(s, a, o, t, z, r, discount)
     m = DiscreteExplicitPOMDP(
         ss, as, os,
         tds, ods,
-        o, r,
+        z, r,
         Dict(ss[i]=>i for i in 1:length(ss)),
         Dict(as[i]=>i for i in 1:length(as)),
         Dict(os[i]=>i for i in 1:length(os)),

From d331e65e33798afbd18980d7f34c808e340e0cdd Mon Sep 17 00:00:00 2001
From: Zachary Sunberg <sunbergzach@gmail.com>
Date: Sat, 1 Jun 2019 12:14:46 -0700
Subject: [PATCH 9/9] link to IDEAS

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 59368a3..815bf88 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 
 Simplified Interface for specifying [POMDPs.jl](https://github.com/JuliaPOMDP/POMDPs.jl) models.
 
-For now there is only one interface (Discrete Explicit), but more may be added (see [IDEAS.md]()).
+For now there is only one interface (Discrete Explicit), but more may be added (see [IDEAS.md](IDEAS.md)).
 
 ## Discrete Explicit Interface