Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

An implementation of CC-POMCP #68

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
034cd45
Intial commit for Vector, GenericResponse, RewardCost, and an initial…
troiwill Mar 27, 2024
5b3321a
Updated ignore file.
troiwill Apr 16, 2024
9223f94
Added CCPOMCP algorithm and dependencies; added test script for Vecto…
troiwill Apr 16, 2024
0571618
Fixed error.
troiwill Apr 16, 2024
954c404
Updated code to improve speed.
troiwill Apr 17, 2024
a6610eb
Removed complex way of handling null responses.
troiwill Apr 18, 2024
6dbcbf7
Implemented NumPy vectors and reduced Python references.
troiwill Apr 18, 2024
62fa04b
Updated and added tests.
troiwill Apr 18, 2024
bd26db7
Added example problem for rocksample for CCPOMCP.
troiwill Apr 18, 2024
2e68bfb
Added profiling for cython.
troiwill Apr 18, 2024
555bb68
Limited nsteps for profiling.
troiwill Apr 18, 2024
c759d42
Limited nsteps for profiling.
troiwill Apr 18, 2024
e899754
Added code for profiling.
troiwill Apr 18, 2024
5ddaaae
Removed except * from c functions.
troiwill Apr 18, 2024
0b88307
Minor additions.
troiwill Apr 18, 2024
dd4705f
Added profiling.
troiwill Apr 18, 2024
b946db0
Added profiling.
troiwill Apr 18, 2024
f166ad8
Minor changes.
troiwill Apr 18, 2024
fa8dac1
Added the comments to function calls.
troiwill Apr 18, 2024
19e779a
Removed except * from function names.
troiwill Apr 18, 2024
a7e666c
Added _create_qnode function to reduce code.
troiwill Apr 19, 2024
1a99ff7
Minor update.
troiwill Apr 19, 2024
57c0568
Removed unneeded test.
troiwill Apr 19, 2024
ce008e8
Added code comments.
troiwill Apr 19, 2024
915b4c0
Removed profiling.
troiwill Apr 19, 2024
900b6a8
Removed profiling code.
troiwill Apr 19, 2024
e06d4ec
Merged ccpomcp-fast-greedy.
troiwill Apr 19, 2024
cc2e218
Changed nsteps to 100.
troiwill Apr 19, 2024
cad3e92
Corrected the description for the Response class.
troiwill Apr 19, 2024
835449d
Removed print statement used for debugging.
troiwill Apr 19, 2024
f90d0d7
Merge remote-tracking branch 'upstream/main' into ccpomcp-fix-ci
May 6, 2024
cf33420
Fixed issue with missing numpy dependency during pip install.
troiwill May 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,8 @@ Thumbs.db
*.pg

.DS_Store

# Cython debugging files
########################
*.c
*.html
6 changes: 6 additions & 0 deletions pomdp_py/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# Framework
from pomdp_py.framework.basics import *
from pomdp_py.framework.generalization import *
from pomdp_py.framework.oopomdp import *
from pomdp_py.framework.planner import *

Expand All @@ -22,6 +23,10 @@
# Algorithms
from pomdp_py.algorithms.value_iteration import ValueIteration # Cython compiled
from pomdp_py.algorithms.value_function import value, qvalue, belief_update
from pomdp_py.algorithms.ccpomcp import (
CostModel,
CCPOMCP,
)
from pomdp_py.algorithms.pomcp import POMCP
from pomdp_py.algorithms.po_rollout import PORollout
from pomdp_py.algorithms.po_uct import (
Expand All @@ -38,3 +43,4 @@
# Templates & Utilities
from pomdp_py.utils.templates import *
from pomdp_py.utils.debugging import TreeDebugger
from pomdp_py.utils.cvec import Vector
7 changes: 6 additions & 1 deletion pomdp_py/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse

available_problems = ["tiger", "rocksample", "mos", "tag", "load_unload"]
available_problems = ["tiger", "rocksample", "mos", "tag", "load_unload", "ccrocksample"]


def parse_args():
Expand Down Expand Up @@ -45,6 +45,11 @@ def parse_args():

main()

elif args.run.lower() == "ccrocksample":
from pomdp_py.problems.cc_rocksample.cc_rocksample_problem import main

main()

else:
print("Unrecognized pomdp: {}".format(args.run))

Expand Down
66 changes: 66 additions & 0 deletions pomdp_py/algorithms/ccpomcp.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# cython: language_level=3

from pomdp_py.algorithms.po_uct cimport QNode
from pomdp_py.algorithms.pomcp cimport POMCP, VNode
from pomdp_py.framework.basics cimport PolicyModel, Action, Agent, State, Observation
from pomdp_py.framework.generalization cimport Response
from pomdp_py.utils.cvec cimport Vector


cdef class CostModel:
pass


cdef class CCQNode(QNode):
cdef Vector _cost_value
cdef Vector _avg_cost_value


cdef class _CCPolicyActionData:
cdef double _prob
cdef Vector _cost_value
cdef Vector _avg_cost_value


cdef class _CCPolicyModel(PolicyModel):
cdef dict[Action, _CCPolicyActionData] _data
cdef double _prob_sum

cdef bint _total_prob_is_not_one(_CCPolicyModel self)
cpdef void add(_CCPolicyModel self, Action action, double prob, CCQNode node)
cpdef void clear(_CCPolicyModel self)
cpdef Vector action_avg_cost(_CCPolicyModel self, Action action)
cpdef Vector action_cost_value(_CCPolicyModel self, Action action)
cdef public float probability(_CCPolicyModel self, Action action, State state)
cdef public Action sample(_CCPolicyModel self, State state)


cdef class CCPOMCP(POMCP):
cdef double _r_diff
cdef double _tau
cdef double _alpha_n
cdef Vector _lambda
cdef Vector _cost_constraint
cdef Response _null_response
cdef bint _use_random_lambda
cdef bint _clip_lambda
cdef double _nu
cdef list[float] _cost_value_init
cdef unsigned int _n_constraints
# Buffers
cdef Vector _Q_lambda, _Action_UCB
cdef _CCPolicyModel _greedy_policy_model

cpdef public Action plan(CCPOMCP self, Agent agent)
cpdef QNode _create_qnode(self, tuple qnode_params = *)
cpdef void _greedy_policy(CCPOMCP self, VNode vnode, double explore_const, double nu)
cdef void _init_lambda_fn(CCPOMCP self)
cpdef tuple[State, Observation, Response] _sample_generative_model(CCPOMCP self, State state, Action action)
cpdef _search(CCPOMCP self)
cpdef Response _simulate(CCPOMCP self, State state, tuple history, VNode root, QNode parent,
Observation observation, int depth)
cdef void _update_cost_constraint(CCPOMCP self, Action sampled_action)


cdef double _compute_visits_ratio(double visits_num, double visits_denom, double explore_const)
cdef double _get_ccqnode_scalar_cost(VNode node, Action action)
Loading
Loading