From 6a16b7cfc8201a60070369b9e046923c7ee116a4 Mon Sep 17 00:00:00 2001 From: Limmen Date: Sun, 4 Feb 2024 15:48:34 +0100 Subject: [PATCH] pomcp --- .../run_cyborg_version_two_b_line.py | 26 ++++++++----------- ...wo_defender_meander_cardiff_decoy_state.py | 2 +- .../csle_agents/agents/pomcp/pomcp_agent.py | 25 ++++++++++++++++++ 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py index 37bcd335f..43cdf949b 100644 --- a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py +++ b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two_b_line.py @@ -8,7 +8,6 @@ from csle_agents.agents.pomcp.pomcp_acquisition_function_type import POMCPAcquisitionFunctionType import csle_agents.constants.constants as agents_constants from csle_agents.common.objective_type import ObjectiveType -from csle_common.dao.simulation_config.simulation_env_config import SimulationEnvConfig from gym_csle_cyborg.dao.csle_cyborg_wrapper_config import CSLECyborgWrapperConfig from gym_csle_cyborg.envs.cyborg_scenario_two_wrapper import CyborgScenarioTwoWrapper from gym_csle_cyborg.dao.red_agent_type import RedAgentType @@ -16,16 +15,13 @@ if __name__ == '__main__': emulation_name = "csle-level9-040" - emulation_env_config = None + emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name) + if emulation_env_config is None: + raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}") simulation_name = "csle-cyborg-001" - simulation_env_config = SimulationEnvConfig(name="", version="", gym_env_name="", simulation_env_input_config="", - players_config="", joint_action_space_config="", - joint_observation_space_config="", time_step_type=None, - reward_function_config=None, transition_operator_config=None, - observation_function_config=None, - initial_state_distribution_config=None, env_parameters_config=None, - plot_transition_probabilities=False, plot_observation_function=False, - plot_reward_function=False, descr="", state_space_config=None) + simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name) + if simulation_env_config is None: + raise ValueError(f"Could not find a simulation with name: {simulation_name}") eval_env_config = CSLECyborgConfig( gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT], maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True, @@ -37,7 +33,7 @@ csle_cyborg_env = CyborgScenarioTwoWrapper(config=simulation_env_config.simulation_env_input_config) A = csle_cyborg_env.get_action_space() initial_particles = csle_cyborg_env.initial_particles - rollout_policy = MetastoreFacade.get_ppo_policy(id=1) + rollout_policy = MetastoreFacade.get_ppo_policy(id=10) # rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/" # "ppo_test_1706439955.8221297/ppo_model2900_1706522984.6982665.zip") # rollout_policy.save_path = ("/Users/kim/workspace/csle/examples/training/pomcp/cyborg_scenario_two_wrapper/" @@ -53,7 +49,7 @@ agent_type=AgentType.POMCP, log_every=1, hparams={ - agents_constants.POMCP.N: HParam(value=5000, name=agents_constants.POMCP.N, + agents_constants.POMCP.N: HParam(value=20, name=agents_constants.POMCP.N, descr="the number of episodes"), agents_constants.POMCP.OBJECTIVE_TYPE: HParam( value=ObjectiveType.MAX, name=agents_constants.POMCP.OBJECTIVE_TYPE, @@ -82,7 +78,7 @@ descr="boolean flag indicating whether the action space should be pruned or not"), agents_constants.POMCP.PRUNE_SIZE: HParam( value=3, name=agents_constants.POMCP.PRUNE_ACTION_SPACE, descr="size of the pruned action space"), - agents_constants.POMCP.MAX_PARTICLES: HParam(value=5000, name=agents_constants.POMCP.MAX_PARTICLES, + agents_constants.POMCP.MAX_PARTICLES: HParam(value=1000, name=agents_constants.POMCP.MAX_PARTICLES, descr="the maximum number of belief particles"), agents_constants.POMCP.MAX_PLANNING_DEPTH: HParam( value=100, name=agents_constants.POMCP.MAX_PLANNING_DEPTH, descr="the maximum depth for planning"), @@ -95,7 +91,7 @@ agents_constants.POMCP.USE_ROLLOUT_POLICY: HParam( value=True, name=agents_constants.POMCP.USE_ROLLOUT_POLICY, descr="boolean flag indicating whether rollout policy should be used"), - agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=1, name=agents_constants.POMCP.PRIOR_WEIGHT, + agents_constants.POMCP.PRIOR_WEIGHT: HParam(value=10, name=agents_constants.POMCP.PRIOR_WEIGHT, descr="the weight on the prior"), agents_constants.POMCP.PRIOR_CONFIDENCE: HParam(value=0, name=agents_constants.POMCP.PRIOR_CONFIDENCE, descr="the prior confidence"), @@ -133,6 +129,6 @@ player_type=PlayerType.DEFENDER, player_idx=0 ) agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config, - experiment_config=experiment_config, save_to_metastore=False) + experiment_config=experiment_config, save_to_metastore=True) experiment_execution = agent.train() MetastoreFacade.save_experiment_execution(experiment_execution) diff --git a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py index 1d1ef480c..b4001c3bb 100644 --- a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py +++ b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py @@ -27,7 +27,7 @@ value=64, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER, descr="neurons per hidden layer of the policy network"), constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam( - value=2, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS, + value=1, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS, descr="number of layers of the policy network"), agents_constants.PPO.STEPS_BETWEEN_UPDATES: HParam( value=2048, name=agents_constants.PPO.STEPS_BETWEEN_UPDATES, diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py index 2e31b2cbd..0e7cfd0dd 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_agent.py @@ -74,6 +74,14 @@ def train(self) -> ExperimentExecution: exp_result.all_metrics[seed][env_constants.ENV_METRICS.TIME_HORIZON] = [] exp_result.all_metrics[seed][agents_constants.COMMON.RUNTIME] = [] + eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value + initial_particles = self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value + rollout_policy = self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value + value_function = self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value + self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = -1 + self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = -1 + self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = -1 + self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = -1 # Initialize training job if self.training_job is None: emulation_name = "" @@ -109,6 +117,11 @@ def train(self) -> ExperimentExecution: exp_execution_id = MetastoreFacade.save_experiment_execution(self.exp_execution) self.exp_execution.id = exp_execution_id + self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = eval_env_config + self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = initial_particles + self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = rollout_policy + self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = value_function + for seed in self.experiment_config.random_seeds: ExperimentUtil.set_seed(seed) exp_result = self.pomcp(exp_result=exp_result, seed=seed, training_job=self.training_job, @@ -152,8 +165,16 @@ def train(self) -> ExperimentExecution: self.exp_execution.timestamp = ts self.exp_execution.result = exp_result if self.save_to_metastore: + eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value + initial_particles = self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value + rollout_policy = self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value + value_function = self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value MetastoreFacade.update_experiment_execution(experiment_execution=self.exp_execution, id=self.exp_execution.id) + self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = eval_env_config + self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = initial_particles + self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = rollout_policy + self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = value_function return self.exp_execution def hparam_names(self) -> List[str]: @@ -217,6 +238,10 @@ def pomcp(self, exp_result: ExperimentResult, seed: int, eval_env_name = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_NAME].value eval_env_config = self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value eval_env: BaseEnv = gym.make(eval_env_name, config=eval_env_config) + self.experiment_config.hparams[agents_constants.POMCP.EVAL_ENV_CONFIG].value = -1 + self.experiment_config.hparams[agents_constants.POMCP.INITIAL_PARTICLES].value = -1 + self.experiment_config.hparams[agents_constants.POMCP.ROLLOUT_POLICY].value = -1 + self.experiment_config.hparams[agents_constants.POMCP.VALUE_FUNCTION].value = -1 # Run N episodes returns = []