Skip to content

Commit

Permalink
pomcp
Browse files Browse the repository at this point in the history
  • Loading branch information
Limmen committed Feb 4, 2024
1 parent 04c6f11 commit 6a16b7c
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,20 @@
from csle_agents.agents.pomcp.pomcp_acquisition_function_type import POMCPAcquisitionFunctionType
import csle_agents.constants.constants as agents_constants
from csle_agents.common.objective_type import ObjectiveType
from csle_common.dao.simulation_config.simulation_env_config import SimulationEnvConfig
from gym_csle_cyborg.dao.csle_cyborg_wrapper_config import CSLECyborgWrapperConfig
from gym_csle_cyborg.envs.cyborg_scenario_two_wrapper import CyborgScenarioTwoWrapper
from gym_csle_cyborg.dao.red_agent_type import RedAgentType
from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig

if __name__ == '__main__':
emulation_name = "csle-level9-040"
emulation_env_config = None
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
if emulation_env_config is None:
raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
simulation_name = "csle-cyborg-001"
simulation_env_config = SimulationEnvConfig(name="", version="", gym_env_name="", simulation_env_input_config="",
players_config="", joint_action_space_config="",
joint_observation_space_config="", time_step_type=None,
reward_function_config=None, transition_operator_config=None,
observation_function_config=None,
initial_state_distribution_config=None, env_parameters_config=None,
plot_transition_probabilities=False, plot_observation_function=False,
plot_reward_function=False, descr="", state_space_config=None)
simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
if simulation_env_config is None:
raise ValueError(f"Could not find a simulation with name: {simulation_name}")
eval_env_config = CSLECyborgConfig(
gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True,
Expand All @@ -37,7 +33,7 @@
csle_cyborg_env = CyborgScenarioTwoWrapper(config=simulation_env_config.simulation_env_input_config)
A = csle_cyborg_env.get_action_space()
initial_particles = csle_cyborg_env.initial_particles
rollout_policy = MetastoreFacade.get_ppo_policy(id=1)
rollout_policy = MetastoreFacade.get_ppo_policy(id=10)
# rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/"
# "ppo_test_1706439955.8221297/ppo_model2900_1706522984.6982665.zip")
# rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/"
Expand All @@ -53,7 +49,7 @@
agent_type=AgentType.POMCP,
log_every=1,
hparams={
agents_constants.POMCP.N: HParam(value=5000, name=agents_constants.POMCP.N,
agents_constants.POMCP.N: HParam(value=20, name=agents_constants.POMCP.N,
descr="the number of episodes"),
agents_constants.POMCP.OBJECTIVE_TYPE: HParam(
value=ObjectiveType.MAX, name=agents_constants.POMCP.OBJECTIVE_TYPE,
Expand Down Expand Up @@ -82,7 +78,7 @@
descr="boolean flag indicating whether the action space should be pruned or not"),
agents_constants.POMCP.PRUNE_SIZE: HParam(
value=3, name=agents_constants.POMCP.PRUNE_ACTION_SPACE, descr="size of the pruned action space"),
agents_constants.POMCP.MAX_PARTICLES: HParam(value=5000, name=agents_constants.POMCP.MAX_PARTICLES,
agents_constants.POMCP.MAX_PARTICLES: HParam(value=1000, name=agents_constants.POMCP.MAX_PARTICLES,
descr="the maximum number of belief particles"),
agents_constants.POMCP.MAX_PLANNING_DEPTH: HParam(
value=100, name=agents_constants.POMCP.MAX_PLANNING_DEPTH, descr="the maximum depth for planning"),
Expand All @@ -95,7 +91,7 @@
agents_constants.POMCP.USE_ROLLOUT_POLICY: HParam(
value=True, name=agents_constants.POMCP.USE_ROLLOUT_POLICY,
descr="boolean flag indicating whether rollout policy should be used"),
agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=1, name=agents_constants.POMCP.PRIOR_WEIGHT,
agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=10, name=agents_constants.POMCP.PRIOR_WEIGHT,
descr="the weight on the prior"),
agents_constants.POMCP.PRIOR_CONFIDENCE: HParam(value=0, name=agents_constants.POMCP.PRIOR_CONFIDENCE,
descr="the prior confidence"),
Expand Down Expand Up @@ -133,6 +129,6 @@
player_type=PlayerType.DEFENDER, player_idx=0
)
agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config,
experiment_config=experiment_config, save_to_metastore=False)
experiment_config=experiment_config, save_to_metastore=True)
experiment_execution = agent.train()
MetastoreFacade.save_experiment_execution(experiment_execution)
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
value=64, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
descr="neurons per hidden layer of the policy network"),
constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam(
value=2, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
value=1, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
descr="number of layers of the policy network"),
agents_constants.PPO.STEPS_BETWEEN_UPDATES: HParam(
value=2048, name=agents_constants.PPO.STEPS_BETWEEN_UPDATES,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ def train(self) -> ExperimentExecution:
exp_result.all_metrics[seed][env_constants.ENV_METRICS.TIME_HORIZON] = []
exp_result.all_metrics[seed][agents_constants.COMMON.RUNTIME] = []

eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value
initial_particles = self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value
rollout_policy = self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value
value_function = self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value
self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = -1
self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = -1
self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = -1
self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = -1
# Initialize training job
if self.training_job is None:
emulation_name = ""
Expand Down Expand Up @@ -109,6 +117,11 @@ def train(self) -> ExperimentExecution:
exp_execution_id = MetastoreFacade.save_experiment_execution(self.exp_execution)
self.exp_execution.id = exp_execution_id

self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = eval_env_config
self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = initial_particles
self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = rollout_policy
self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = value_function

for seed in self.experiment_config.random_seeds:
ExperimentUtil.set_seed(seed)
exp_result = self.pomcp(exp_result=exp_result, seed=seed, training_job=self.training_job,
Expand Down Expand Up @@ -152,8 +165,16 @@ def train(self) -> ExperimentExecution:
self.exp_execution.timestamp = ts
self.exp_execution.result = exp_result
if self.save_to_metastore:
eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value
initial_particles = self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value
rollout_policy = self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value
value_function = self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value
MetastoreFacade.update_experiment_execution(experiment_execution=self.exp_execution,
id=self.exp_execution.id)
self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = eval_env_config
self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = initial_particles
self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = rollout_policy
self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = value_function
return self.exp_execution

def hparam_names(self) -> List[str]:
Expand Down Expand Up @@ -217,6 +238,10 @@ def pomcp(self, exp_result: ExperimentResult, seed: int,
eval_env_name = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_NAME].value
eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value
eval_env: BaseEnv = gym.make(eval_env_name, config=eval_env_config)
self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = -1
self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = -1
self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = -1
self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = -1

# Run N episodes
returns = []
Expand Down

0 comments on commit 6a16b7c

Please sign in to comment.