major changes

JuliaPOMDP · Mar 26, 2024 · 79c0726 · 79c0726
1 parent f733630
commit 79c0726
Show file tree

Hide file tree

Showing 11 changed files with 143 additions and 98 deletions.
diff --git a/Project.toml b/Project.toml
@@ -11,11 +11,13 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LocalApproximationValueIteration = "a40420fb-f401-52da-a663-f502e5b95060"
 LocalFunctionApproximation = "db97f5ab-fc25-52dd-a8f9-02a257c35074"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
+MCTS = "e12ccd36-dcad-5f33-8774-9175229e7b33"
 MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
 NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
 POMDPModels = "355abbd5-f08e-5560-ac9e-8b5f2592a0ca"
 POMDPTools = "7588e00f-9cae-40de-98dc-e0c70c48cdd7"
 POMDPs = "a93abf59-7444-517b-a68a-c42f96afdd7d"
+ParticleFilters = "c8b314e2-9260-5cf8-ae76-3be7461ca6d0"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 

diff --git a/src/CompressedBeliefMDPs.jl b/src/CompressedBeliefMDPs.jl
@@ -4,6 +4,7 @@ using Infiltrator
 
 using POMDPs
 using POMDPTools
+using ParticleFilters
 using LocalApproximationValueIteration
 using LocalFunctionApproximation
 
@@ -26,8 +27,9 @@ export
     MultivariateStatsCompressor,
     PCACompressor,
     KernelPCACompressor,
-    PPCACompressor
-    # FactorAnalysisCompressor  # TODO: debug
+    PPCACompressor,
+    FactorAnalysisCompressor,
+    MDSCompressor
 include("compressors/compressor.jl")
 include("compressors/mv_stats.jl")
 
@@ -36,7 +38,7 @@ export
 include("sampler.jl")
 
 export
-    CompressedBeliefMDP,
+    CompressedBeliefMDP
 include("cbmdp.jl")
 
 export

diff --git a/src/cbmdp.jl b/src/cbmdp.jl
@@ -1,7 +1,7 @@
 struct CompressedBeliefMDP{B, A} <: MDP{B, A}
     bmdp::GenerativeBeliefMDP
     compressor::Compressor
-    ϕ::Bijection  # ϕ: belief ↦ compress(compressor, belief); NOTE: While compressions aren't usually injective, we cache compressed beliefs on a first-come, first-served basis, so the *cache* is effectively bijective.
+    ϕ::Bijection  # ϕ: belief ↦ compressor(belief); NOTE: While compressions aren't usually injective, we cache compressed beliefs on a first-come, first-served basis, so the *cache* is effectively bijective.
 end
 
 
@@ -10,7 +10,7 @@ function CompressedBeliefMDP(pomdp::POMDP, updater::Updater, compressor::Compres
     # Hack to determine typeof(b̃)
     bmdp = GenerativeBeliefMDP(pomdp, updater)
     b = initialstate(bmdp).val
-    b̃ = compress(compressor, convert_s(AbstractVector{Float64}, b, bmdp.pomdp))
+    b̃ = compressor(convert_s(AbstractVector{Float64}, b, bmdp.pomdp))
     B = typeof(b)
     B̃ = typeof(b̃)
     ϕ = Bijection{B, B̃}()
@@ -23,10 +23,14 @@ function decode(m::CompressedBeliefMDP, b̃)
 end
 
 function encode(m::CompressedBeliefMDP, b)
-    b = convert_s(AbstractVector{Float64}, b, m)
-    b̃ = get!(m.ϕ, b) do
-        b = convert_s(AbstractArray{Float64}, b, m)  # TODO: not sure if I need a `let b = ...` here
-        compress(m.compressor, b)  # NOTE: compress is only called if b ∉ domain(m.ϕ)
+    if b ∈ domain(m.ϕ)
+        b̃ = m.ϕ[b]
+    else
+        b_numerical = convert_s(AbstractArray{Float64}, b, m)
+        b̃ = m.compressor(b_numerical)
+        if b̃ ∉ image(m.ϕ)
+            m.ϕ[b] = b̃
+        end
     end
     return b̃
 end
@@ -38,14 +42,17 @@ function POMDPs.gen(m::CompressedBeliefMDP, b̃, a, rng::Random.AbstractRNG)
     return (sp=b̃p, r=r)
 end
 
+# TODO: handle sampling terminal states /Users/logan/.julia/packages/POMDPTools/7Rekv/src/ModelTools/generative_belief_mdp.jl
+
 # TODO: use macro forwarding
 # TODO: read about orthogonalized code on julia documetation
+POMDPs.states(m::CompressedBeliefMDP) = [encode(m, initialize_belief(m.bmdp.updater, s)) for s in states(m.bmdp.pomdp)]
+POMDPs.initialstate(m::CompressedBeliefMDP) = encode(m, initialstate(m.bmdp))
 POMDPs.actions(m::CompressedBeliefMDP, b̃) = actions(m.bmdp, decode(m, b̃))
 POMDPs.actions(m::CompressedBeliefMDP) = actions(m.bmdp)
+POMDPs.actionindex(m::CompressedBeliefMDP, a) = actionindex(m.bmdp.pomdp, a)
 POMDPs.isterminal(m::CompressedBeliefMDP, b̃) = isterminal(m.bmdp, decode(m, b̃))
 POMDPs.discount(m::CompressedBeliefMDP) = discount(m.bmdp)
-POMDPs.initialstate(m::CompressedBeliefMDP) = encode(m, initialstate(m.bmdp))
-POMDPs.actionindex(m::CompressedBeliefMDP, a) = actionindex(m.bmdp.pomdp, a)
 
 POMDPs.convert_s(t::Type, s, m::CompressedBeliefMDP) = convert_s(t, s, m.bmdp.pomdp)
 POMDPs.convert_s(t::Type{<:AbstractArray}, s::AbstractArray, m::CompressedBeliefMDP) = convert_s(t, s, m.bmdp.pomdp)  # NOTE: this second implementation is b/c to get around a requirement from POMDPLinter
@@ -54,7 +61,6 @@ POMDPs.convert_s(t::Type{<:AbstractArray}, s::AbstractArray, m::CompressedBelief
 ExplicitDistribution = Union{SparseCat, BoolDistribution, Deterministic, Uniform}  # distributions w/ explicit PDFs from POMDPs.jl (https://juliapomdp.github.io/POMDPs.jl/latest/POMDPTools/distributions/#Implemented-Distributions)
 POMDPs.convert_s(::Type{<:AbstractArray}, s::ExplicitDistribution, m::POMDP) = [pdf(s, x) for x in states(m)]
 
-
 # function POMDPs.convert_s(t::Type{V}, s, m::CompressedBeliefMDP) where V<:AbstractArray
 #     convert_s(t, s, m.bmdp.pomdp)
 # end

diff --git a/src/compressors/compressor.jl b/src/compressors/compressor.jl
@@ -1,30 +1,8 @@
-"""
-Base type for an MDP/POMDP belief compression.
-"""
 abstract type Compressor end
 
-
 """
     fit!(compressor::Compressor, beliefs)
 
 Fit the compressor to beliefs.
 """
-function fit! end
-
-
-"""
-    compress(compressor::Compressor, beliefs)
-
-Compress the sampled beliefs using method associated with compressor, and returns a compressed representation.
-"""
-function compress end
-
-
-"""
-    decompress(compressor::Compressor, compressed)
-
-Decompress the compressed beliefs using method associated with compressor, and returns the reconstructed beliefs.
-"""
-function decompress end
-
-# TODO: remove decompress and make compress a functor (https://docs.julialang.org/en/v1/manual/methods/#Note-on-Optional-and-keyword-Arguments)
+function fit! end
diff --git a/src/compressors/manifold_learning.jl b/src/compressors/manifold_learning.jl
@@ -0,0 +1,5 @@
+"""
+Wrapper for ManifoldLearning.jl. See https://wildart.github.io/ManifoldLearning.jl/stable/.
+"""
+
+# TODO
diff --git a/src/compressors/mv_stats.jl b/src/compressors/mv_stats.jl
@@ -1,34 +1,30 @@
+"""
+Wrappers for MultivariateStats.jl. See https://juliastats.org/MultivariateStats.jl/stable/.
+"""
+
 using MultivariateStats
 
-mutable struct MultivariateStatsCompressor{T<:MultivariateStats.AbstractDimensionalityReduction} <: Compressor
+
+mutable struct MVSCompressor{T<:MultivariateStats.AbstractDimensionalityReduction} <: Compressor
     const maxoutdim::Integer
-    M  # TODO: check if this is Julian (how to replace unde)
+    M
 end
 
-function fit!(compressor::MultivariateStatsCompressor{T}, beliefs) where T<:MultivariateStats.AbstractDimensionalityReduction
-    compressor.M = MultivariateStats.fit(T, beliefs'; maxoutdim=compressor.maxoutdim)
-end
+(c::MVSCompressor)(beliefs) = ndims(beliefs) == 2 ? MultivariateStats.predict(c.M, beliefs')' : vec(MultivariateStats.predict(c.M, beliefs))
 
-# TODO: is there a way to solve this w/ multiple dispatch? clean up
-function compress(compressor::MultivariateStatsCompressor, beliefs)
-    # TODO: is there better way to do this?
-    return ndims(beliefs) == 2 ? predict(compressor.M, beliefs')' : vec(predict(compressor.M, beliefs))
+function fit!(compressor::MVSCompressor{T}, beliefs) where T<:MultivariateStats.AbstractDimensionalityReduction
+    compressor.M = MultivariateStats.fit(T, beliefs'; maxoutdim=compressor.maxoutdim)
 end
 
-decompress(compressor::MultivariateStatsCompressor, compressed) = MultivariateStats.reconstruct(compressor.M, compressed)
-
-MultivariateStatsCompressor(maxoutdim::Integer, T) = MultivariateStatsCompressor{T}(maxoutdim, nothing)
+MVSCompressor(maxoutdim::Integer, T) = MVSCompressor{T}(maxoutdim, nothing)
 
 # PCA Compressors
-PCACompressor(maxoutdim::Integer) = MultivariateStatsCompressor(maxoutdim, PCA)
-KernelPCACompressor(maxoutdim::Integer) = MultivariateStatsCompressor(maxoutdim, KernelPCA)
-PPCACompressor(maxoutdim::Integer) = MultivariateStatsCompressor(maxoutdim, PPCA)
-
-# TODO: debug this
-function fit!(compressor::MultivariateStatsCompressor{KernelPCA}, beliefs)
-    compressor.M = MultivariateStats.fit(KernelPCA, beliefs'; maxoutdim=compressor.maxoutdim, inverse=true)
-end
+PCACompressor(maxoutdim::Integer) = MVSCompressor(maxoutdim, PCA)
+KernelPCACompressor(maxoutdim::Integer) = MVSCompressor(maxoutdim, KernelPCA)
+PPCACompressor(maxoutdim::Integer) = MVSCompressor(maxoutdim, PPCA)
 
 # Factor Analysis Compressor
-FactorAnalysisCompressor(maxoutdim::Integer) = MultivariateStatsCompressor(maxoutdim, FactorAnalysis)
+FactorAnalysisCompressor(maxoutdim::Integer) = MVSCompressor(maxoutdim, FactorAnalysis)
 
+# Multidimensional Scaling
+MDSCompressor(maxoutdim::Integer) = MVSCompressor(maxoutdim, MDS)
diff --git a/src/sampler.jl b/src/sampler.jl
@@ -7,4 +7,9 @@ end
 
 function sample(pomdp::POMDP, policy::ExplorationPolicy, updater::Updater, n::Integer)
     # TODO:
-end
+end
+
+
+"""
+Adapted from algorithm 21.13 from AFDM
+"""
diff --git a/src/solver.jl b/src/solver.jl
@@ -1,41 +1,81 @@
-struct CompressedBeliefSolver <: Solver
-    explorer::Union{Policy, ExplorationPolicy}
-    updater::Updater
-    compressor::Compressor
-    base_solver::Solver
-    n::Integer
+### POLICY ###
+
+struct CompressedBeliefPolicy <: POMDPs.Policy
+    m::CompressedBeliefMDP
+    base_policy::Policy
 end
 
-function CompressedBeliefSolver(
-    explorer::Union{Policy, ExplorationPolicy},
-    updater::Updater,
-    compressor::Compressor,
-    base_solver::Solver;
-    n=100
-)
-    return CompressedBeliefSolver(explorer, updater, compressor, base_solver, n)
+function POMDPs.action(p::CompressedBeliefPolicy, s)
+    b = initialize_belief(p.m.bmdp.updater, s)
+    action(p.base_policy, encode(p.m, b))
 end
 
-# TODO: make compressed solver that infers everything 
-# TODO: make compressed solver that uses local FA solver
+function POMDPs.value(p::CompressedBeliefPolicy, s)
+    b = initialize_belief(p.m.bmdp.updater, s)
+    value(p.base_policy, encode(p.m, b))
+end
+
+POMDPs.updater(p::CompressedBeliefPolicy) = p.m.bmdp.updater
 
-struct CompressedBeliefPolicy <: Policy
+### SOLVER ###
+
+struct CompressedBeliefSolver <: Solver
     m::CompressedBeliefMDP
-    base_policy::Policy
+    base_solver::Solver
 end
 
-POMDPs.action(p::CompressedBeliefPolicy, b) = action(p.base_policy, encode(m, b))
-POMDPs.value(p::CompressedBeliefPolicy, b) = value(p.base_policy, encode(m, b))
-POMDPs.updater(p::CompressedBeliefPolicy) = p.m.bmdp.updater
+# TODO: add seeding
+function CompressedBeliefSolver(
+    pomdp::POMDP;
+    explorer::Union{Policy, ExplorationPolicy}=RandomPolicy(pomdp),
+    updater::Updater=applicable(POMDPs.states, pomdp) ? DiscreteUpdater(pomdp) : BootstrapFilter(pomdp, 5000),  # hack to determine default updater, may select incompatible Updater
+    compressor::Compressor=PCACompressor(1), 
+    n::Integer=50,  # max number of belief samples to compress
+    interp::Union{Nothing, LocalFunctionApproximator}=nothing,
+    k=1,  # k nearest neighbors; only used if interp is nothing
+    verbose=false,
+    max_iterations=1000,  # for value iteration
+    n_generative_samples=10,  # number of steps to look ahead when calculated expected reward
+    belres::Float64=1e-3,
+)
+    # sample beliefs
+    B = sample(pomdp, explorer, updater, n)
 
-function POMDPs.solve(solver::CompressedBeliefSolver, pomdp::POMDP)
-    B = sample(pomdp, solver.explorer, solver.updater, solver.n)
+    # compress beliefs and cache mapping
     B_numerical = mapreduce(b->convert_s(AbstractArray{Float64}, b, pomdp), hcat, B)'
-    fit!(solver.compressor, B_numerical)
-    B̃ = compress(solver.compressor, B_numerical)
-    m = CompressedBeliefMDP(pomdp, solver.updater, solver.compressor)
+    fit!(compressor, B_numerical)
+    B̃ = compressor(B_numerical)
     ϕ = Dict(unique(t->t[2], zip(B, eachrow(B̃))))
-    merge!(m.ϕ, ϕ)  # update compression cache
-    base_policy = solve(solver.base_solver, m)
-    return CompressedBeliefPolicy(m, base_policy)
+
+    # construct the compressed belief-state MDP
+    m = CompressedBeliefMDP(pomdp, updater, compressor)
+    merge!(m.ϕ, ϕ)  # update the compression cache
+
+    # define the interpolator for the solver
+    if isnothing(interp)
+        data = map(row->SVector(row...), eachrow(B̃))
+        tree = KDTree(data)
+        interp = LocalNNFunctionApproximator(tree, data, k)  # TODO: check that we need this
+    end
+
+    # build the based solver
+    base_solver = LocalApproximationValueIterationSolver(
+        interp,
+        max_iterations=max_iterations,
+        belres=belres,
+        verbose=verbose,
+        is_mdp_generative=true,
+        n_generative_samples=n_generative_samples
+    )
+
+    return CompressedBeliefSolver(m, base_solver)
+end
+
+function POMDPs.solve(solver::CompressedBeliefSolver, pomdp::POMDP)
+    if solver.m.bmdp.pomdp !== pomdp
+        @warn "Got $pomdp, but solver.m.bmdp.pomdp $(solver.m.bmdp.pomdp) isn't identical"
+    end
+
+    base_policy = solve(solver.base_solver, solver.m)
+    return CompressedBeliefPolicy(solver.m, base_policy)
 end
diff --git a/test/mv_stats_tests.jl b/test/mv_stats_tests.jl
@@ -1,25 +1,25 @@
 function test_compressor(C::Function, maxoutdim::Int)
-    pomdp = TMaze(20, 0.99)
-    sampler = DiscreteRandomSampler(pomdp)
+    pomdp = BabyPOMDP()  # TODO: change to TMaze once I figure out how to properly sample
     compressor = C(maxoutdim)
-    solver = CompressedSolver(pomdp, sampler, compressor; n_samples=20, verbose=false, max_iterations=5)
-    approx_policy = solve(solver, pomdp)
+    solver = CompressedBeliefSolver(pomdp; compressor=compressor, n=20)
+    policy = solve(solver, pomdp)
     s = initialstate(pomdp)
-    _ = value(approx_policy, s)
-    _ = action(approx_policy, s)
-    return approx_policy
+    _ = action(policy, s)
+    _ = value(policy, s)
+    return policy
 end
 
 MV_STATS_COMPRESSORS = (
     PCACompressor,
     KernelPCACompressor,
     PPCACompressor,
-    # FactorAnalysisCompressor
+    FactorAnalysisCompressor,
+    MDSCompressor
 )
 
 @testset "Compressor Tests" begin
     @testset "$C" for C in MV_STATS_COMPRESSORS
-        @inferred test_compressor(C, 1)
-        @inferred test_compressor(C, 10)
+        @test_nowarn test_compressor(C, 1)
+        @test_nowarn test_compressor(C, 2)
     end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,9 +1,11 @@
 using CompressedBeliefMDPs
 using Test
 
-using POMDPs
-using POMDPModels
+using POMDPs, POMDPModels, POMDPTools
+# TODO: also test w/ FA solver
+using MCTS
 
 @testset "CompressedBeliefMDPs.jl" begin
     include("mv_stats_tests.jl")
+    include("solver_tests.jl")
 end
diff --git a/test/solver_tests.jl b/test/solver_tests.jl
@@ -0,0 +1,9 @@
+@testset "Solver Tests" begin
+    compressor = PCACompressor(1)
+    @testset "$pomdp" for pomdp in (BabyPOMDP(), TigerPOMDP(), TMaze(6, 0.99), LightDark1D())
+        solver = CompressedBeliefSolver(pomdp; n=10)
+        @test_nowarn test_solver(solver, pomdp)
+    end
+end
+
+# TODO: add test w/ MCTS