From 6a16b7cfc8201a60070369b9e046923c7ee116a4 Mon Sep 17 00:00:00 2001
From: Limmen <kimham@kth.se>
Date: Sun, 4 Feb 2024 15:48:34 +0100
Subject: [PATCH] pomcp

---
 .../run_cyborg_version_two_b_line.py          | 26 ++++++++-----------
 ...wo_defender_meander_cardiff_decoy_state.py |  2 +-
 .../csle_agents/agents/pomcp/pomcp_agent.py   | 25 ++++++++++++++++++
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py
index 37bcd335f..43cdf949b 100644
--- a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py
+++ b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py
@@ -8,7 +8,6 @@
 from csle_agents.agents.pomcp.pomcp_acquisition_function_type import POMCPAcquisitionFunctionType
 import csle_agents.constants.constants as agents_constants
 from csle_agents.common.objective_type import ObjectiveType
-from csle_common.dao.simulation_config.simulation_env_config import SimulationEnvConfig
 from gym_csle_cyborg.dao.csle_cyborg_wrapper_config import CSLECyborgWrapperConfig
 from gym_csle_cyborg.envs.cyborg_scenario_two_wrapper import CyborgScenarioTwoWrapper
 from gym_csle_cyborg.dao.red_agent_type import RedAgentType
@@ -16,16 +15,13 @@
 
 if __name__ == '__main__':
     emulation_name = "csle-level9-040"
-    emulation_env_config = None
+    emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
+    if emulation_env_config is None:
+        raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
     simulation_name = "csle-cyborg-001"
-    simulation_env_config = SimulationEnvConfig(name="", version="", gym_env_name="", simulation_env_input_config="",
-                                                players_config="", joint_action_space_config="",
-                                                joint_observation_space_config="", time_step_type=None,
-                                                reward_function_config=None, transition_operator_config=None,
-                                                observation_function_config=None,
-                                                initial_state_distribution_config=None, env_parameters_config=None,
-                                                plot_transition_probabilities=False, plot_observation_function=False,
-                                                plot_reward_function=False, descr="", state_space_config=None)
+    simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
+    if simulation_env_config is None:
+        raise ValueError(f"Could not find a simulation with name: {simulation_name}")
     eval_env_config = CSLECyborgConfig(
         gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
         maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True,
@@ -37,7 +33,7 @@
     csle_cyborg_env = CyborgScenarioTwoWrapper(config=simulation_env_config.simulation_env_input_config)
     A = csle_cyborg_env.get_action_space()
     initial_particles = csle_cyborg_env.initial_particles
-    rollout_policy = MetastoreFacade.get_ppo_policy(id=1)
+    rollout_policy = MetastoreFacade.get_ppo_policy(id=10)
     # rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/"
     #                             "ppo_test_1706439955.8221297/ppo_model2900_1706522984.6982665.zip")
     # rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/"
@@ -53,7 +49,7 @@
         agent_type=AgentType.POMCP,
         log_every=1,
         hparams={
-            agents_constants.POMCP.N: HParam(value=5000, name=agents_constants.POMCP.N,
+            agents_constants.POMCP.N: HParam(value=20, name=agents_constants.POMCP.N,
                                              descr="the number of episodes"),
             agents_constants.POMCP.OBJECTIVE_TYPE: HParam(
                 value=ObjectiveType.MAX, name=agents_constants.POMCP.OBJECTIVE_TYPE,
@@ -82,7 +78,7 @@
                 descr="boolean flag indicating whether the action space should be pruned or not"),
             agents_constants.POMCP.PRUNE_SIZE: HParam(
                 value=3, name=agents_constants.POMCP.PRUNE_ACTION_SPACE, descr="size of the pruned action space"),
-            agents_constants.POMCP.MAX_PARTICLES: HParam(value=5000, name=agents_constants.POMCP.MAX_PARTICLES,
+            agents_constants.POMCP.MAX_PARTICLES: HParam(value=1000, name=agents_constants.POMCP.MAX_PARTICLES,
                                                          descr="the maximum number of belief particles"),
             agents_constants.POMCP.MAX_PLANNING_DEPTH: HParam(
                 value=100, name=agents_constants.POMCP.MAX_PLANNING_DEPTH, descr="the maximum depth for planning"),
@@ -95,7 +91,7 @@
             agents_constants.POMCP.USE_ROLLOUT_POLICY: HParam(
                 value=True, name=agents_constants.POMCP.USE_ROLLOUT_POLICY,
                 descr="boolean flag indicating whether rollout policy should be used"),
-            agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=1, name=agents_constants.POMCP.PRIOR_WEIGHT,
+            agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=10, name=agents_constants.POMCP.PRIOR_WEIGHT,
                                                         descr="the weight on the prior"),
             agents_constants.POMCP.PRIOR_CONFIDENCE: HParam(value=0, name=agents_constants.POMCP.PRIOR_CONFIDENCE,
                                                             descr="the prior confidence"),
@@ -133,6 +129,6 @@
         player_type=PlayerType.DEFENDER, player_idx=0
     )
     agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config,
-                       experiment_config=experiment_config, save_to_metastore=False)
+                       experiment_config=experiment_config, save_to_metastore=True)
     experiment_execution = agent.train()
     MetastoreFacade.save_experiment_execution(experiment_execution)
diff --git a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py
index 1d1ef480c..b4001c3bb 100644
--- a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py
+++ b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py
@@ -27,7 +27,7 @@
                 value=64, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
                 descr="neurons per hidden layer of the policy network"),
             constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam(
-                value=2, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
+                value=1, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
                 descr="number of layers of the policy network"),
             agents_constants.PPO.STEPS_BETWEEN_UPDATES: HParam(
                 value=2048, name=agents_constants.PPO.STEPS_BETWEEN_UPDATES,
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py
index 2e31b2cbd..0e7cfd0dd 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py
@@ -74,6 +74,14 @@ def train(self) -> ExperimentExecution:
             exp_result.all_metrics[seed][env_constants.ENV_METRICS.TIME_HORIZON] = []
             exp_result.all_metrics[seed][agents_constants.COMMON.RUNTIME] = []
 
+        eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value
+        initial_particles = self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value
+        rollout_policy = self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value
+        value_function = self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value
+        self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = -1
+        self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = -1
+        self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = -1
+        self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = -1
         # Initialize training job
         if self.training_job is None:
             emulation_name = ""
@@ -109,6 +117,11 @@ def train(self) -> ExperimentExecution:
             exp_execution_id = MetastoreFacade.save_experiment_execution(self.exp_execution)
             self.exp_execution.id = exp_execution_id
 
+        self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = eval_env_config
+        self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = initial_particles
+        self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = rollout_policy
+        self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = value_function
+
         for seed in self.experiment_config.random_seeds:
             ExperimentUtil.set_seed(seed)
             exp_result = self.pomcp(exp_result=exp_result, seed=seed, training_job=self.training_job,
@@ -152,8 +165,16 @@ def train(self) -> ExperimentExecution:
         self.exp_execution.timestamp = ts
         self.exp_execution.result = exp_result
         if self.save_to_metastore:
+            eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value
+            initial_particles = self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value
+            rollout_policy = self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value
+            value_function = self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value
             MetastoreFacade.update_experiment_execution(experiment_execution=self.exp_execution,
                                                         id=self.exp_execution.id)
+            self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = eval_env_config
+            self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = initial_particles
+            self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = rollout_policy
+            self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = value_function
         return self.exp_execution
 
     def hparam_names(self) -> List[str]:
@@ -217,6 +238,10 @@ def pomcp(self, exp_result: ExperimentResult, seed: int,
         eval_env_name = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_NAME].value
         eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value
         eval_env: BaseEnv = gym.make(eval_env_name, config=eval_env_config)
+        self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = -1
+        self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = -1
+        self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = -1
+        self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = -1
 
         # Run N episodes
         returns = []