csle-cyborg [WIP]

Limmen · Jan 11, 2024 · 2bda03b · 2bda03b
1 parent 74f580a
commit 2bda03b
Show file tree

Hide file tree

Showing 11 changed files with 448 additions and 11 deletions.
diff --git a/examples/training/ppo/cyborg_scenario_two_defender/README.md b/examples/training/ppo/cyborg_scenario_two_defender/README.md
@@ -0,0 +1,22 @@
+# Proximal Policy Optimization (PPO) for POMDP
+
+This directory contains example scripts for optimizing defender policies using PPO for the POMDP from [https://ieeexplore.ieee.org/document/9779345](https://ieeexplore.ieee.org/document/9779345)
+
+## Commands
+
+To run a script, execute:
+```bash
+python <script_name>
+```
+
+## Author & Maintainer
+
+Kim Hammar <[email protected]>
+
+## Copyright and license
+
+[LICENSE](../../../../LICENSE.md)
+
+Creative Commons
+
+(C) 2020-2024, Kim Hammar
diff --git a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender.py b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender.py
@@ -0,0 +1,103 @@
+import csle_common.constants.constants as constants
+from csle_common.dao.training.experiment_config import ExperimentConfig
+from csle_common.metastore.metastore_facade import MetastoreFacade
+from csle_common.dao.training.agent_type import AgentType
+from csle_common.dao.training.hparam import HParam
+from csle_common.dao.training.player_type import PlayerType
+from csle_agents.agents.ppo.ppo_agent import PPOAgent
+import csle_agents.constants.constants as agents_constants
+from csle_common.dao.training.tabular_policy import TabularPolicy
+
+if __name__ == '__main__':
+    emulation_name = "csle-level9-040"
+    emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
+    if emulation_env_config is None:
+        raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
+    simulation_name = "csle-stopping-pomdp-defender-001"
+    simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
+    if simulation_env_config is None:
+        raise ValueError(f"Could not find a simulation with name: {simulation_name}")
+    experiment_config = ExperimentConfig(
+        output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}ppo_test",
+        title="PPO test", random_seeds=[399, 98912, 999], agent_type=AgentType.PPO,
+        log_every=1,
+        hparams={
+            constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam(
+                value=64, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
+                descr="neurons per hidden layer of the policy network"),
+            constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam(
+                value=4, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
+                descr="number of layers of the policy network"),
+            agents_constants.PPO.STEPS_BETWEEN_UPDATES: HParam(
+                value=1096, name=agents_constants.PPO.STEPS_BETWEEN_UPDATES,
+                descr="number of steps in the environment for doing rollouts between policy updates"),
+            agents_constants.COMMON.BATCH_SIZE: HParam(value=64, name=agents_constants.COMMON.BATCH_SIZE,
+                                                       descr="batch size for updates"),
+            agents_constants.COMMON.LEARNING_RATE: HParam(value=0.0001,
+                                                          name=agents_constants.COMMON.LEARNING_RATE,
+                                                          descr="learning rate for updating the policy"),
+            constants.NEURAL_NETWORKS.DEVICE: HParam(value="cpu",
+                                                     name=constants.NEURAL_NETWORKS.DEVICE,
+                                                     descr="the device to train on (cpu or cuda:x)"),
+            agents_constants.COMMON.NUM_PARALLEL_ENVS: HParam(
+                value=1, name=agents_constants.COMMON.NUM_PARALLEL_ENVS,
+                descr="the nunmber of parallel environments for training"),
+            agents_constants.COMMON.GAMMA: HParam(
+                value=1, name=agents_constants.COMMON.GAMMA, descr="the discount factor"),
+            agents_constants.PPO.GAE_LAMBDA: HParam(
+                value=0.95, name=agents_constants.PPO.GAE_LAMBDA, descr="the GAE weighting term"),
+            agents_constants.PPO.CLIP_RANGE: HParam(
+                value=0.2, name=agents_constants.PPO.CLIP_RANGE, descr="the clip range for PPO"),
+            agents_constants.PPO.CLIP_RANGE_VF: HParam(
+                value=None, name=agents_constants.PPO.CLIP_RANGE_VF,
+                descr="the clip range for PPO-update of the value network"),
+            agents_constants.PPO.ENT_COEF: HParam(
+                value=0.0, name=agents_constants.PPO.ENT_COEF,
+                descr="the entropy coefficient for exploration"),
+            agents_constants.PPO.VF_COEF: HParam(value=0.5, name=agents_constants.PPO.VF_COEF,
+                                                 descr="the coefficient of the value network for the loss"),
+            agents_constants.PPO.MAX_GRAD_NORM: HParam(
+                value=0.5, name=agents_constants.PPO.MAX_GRAD_NORM, descr="the maximum allows gradient norm"),
+            agents_constants.PPO.TARGET_KL: HParam(value=None,
+                                                   name=agents_constants.PPO.TARGET_KL,
+                                                   descr="the target kl"),
+            agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam(
+                value=int(150000), name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
+                descr="number of timesteps to train"),
+            agents_constants.COMMON.EVAL_EVERY: HParam(value=10, name=agents_constants.COMMON.EVAL_EVERY,
+                                                       descr="training iterations between evaluations"),
+            agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=10, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
+                                                            descr="the batch size for evaluation"),
+            agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY,
+                                                       descr="how frequently to save the model"),
+            agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
+                value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
+                descr="confidence interval"),
+            agents_constants.COMMON.MAX_ENV_STEPS: HParam(
+                value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
+                descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
+            agents_constants.COMMON.RUNNING_AVERAGE: HParam(
+                value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
+                descr="the number of samples to include when computing the running avg"),
+            agents_constants.COMMON.L: HParam(value=3, name=agents_constants.COMMON.L,
+                                              descr="the number of stop actions")
+        },
+        player_type=PlayerType.DEFENDER, player_idx=0
+    )
+    simulation_env_config.simulation_env_input_config.attacker_strategy = TabularPolicy(
+        player_type=PlayerType.ATTACKER,
+        actions=simulation_env_config.joint_action_space_config.action_spaces[1].actions,
+        simulation_name=simulation_env_config.name, value_function=None, q_table=None,
+        lookup_table=[
+            [0.8, 0.2],
+            [1, 0],
+            [1, 0]
+        ],
+        agent_type=AgentType.RANDOM, avg_R=-1)
+    # simulation_env_config.simulation_env_input_config
+    agent = PPOAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config,
+                     experiment_config=experiment_config)
+    experiment_execution = agent.train()
+    MetastoreFacade.save_experiment_execution(experiment_execution)
+    for policy in experiment_execution.result.policies.values():
+        MetastoreFacade.save_ppo_policy(ppo_policy=policy)
diff --git a/simulation-system/envs/Makefile b/simulation-system/envs/Makefile
@@ -35,6 +35,9 @@ install_apt_mdp_attacker:
 install_apt_pomdp_defender:
 	cd apt_pomdp_defender/ && $(MAKE) install
 
+install_cyborg:
+	cd cyborg/ && $(MAKE) install
+
 # Installs all simulations
 install:
 	cd stopping_game/ && $(MAKE) install
@@ -49,6 +52,7 @@ install:
 	cd apt_game/ && $(MAKE) install
 	cd apt_mdp_attacker/ && $(MAKE) install
 	cd apt_pomdp_defender/ && $(MAKE) install
+	cd cyborg/ && $(MAKE) install
 
 # Targets for uninstalling each individual env
 uninstall_stopping_game:
@@ -87,6 +91,9 @@ uninstall_apt_mdp_attacker:
 uninstall_apt_pomdp_defender:
 	cd apt_pomdp_defender/ && $(MAKE) uninstall
 
+uninstall_cyborg:
+	cd cyborg/ && $(MAKE) uninstall
+
 # Uninstalls all simulation envs
 uninstall:
 	cd stopping_game/ && $(MAKE) uninstall
@@ -101,6 +108,7 @@ uninstall:
 	cd apt_game/ && $(MAKE) uninstall
 	cd apt_mdp_attacker/ && $(MAKE) uninstall
 	cd apt_pomdp_defender/ && $(MAKE) uninstall
+	cd cyborg/ && $(MAKE) uninstall
 
 # Targets for cleaning the config each individual env
 clean_config_stopping_game:
@@ -139,6 +147,9 @@ clean_config_apt_mdp_attacker:
 clean_config_apt_pomdp_defender:
 	cd apt_pomdp_defender/ && $(MAKE) clean_config
 
+cyborg:
+	cd cyborg/ && $(MAKE) clean_config
+
 # Cleans the materialized configuration of each emulation
 clean_config:
 	cd stopping_game/ && $(MAKE) clean_config
@@ -153,3 +164,4 @@ clean_config:
 	cd apt_game/ && $(MAKE) clean_config
 	cd apt_mdp_attacker/ && $(MAKE) clean_config
 	cd apt_pomdp_defender/ && $(MAKE) clean_config
+	cd cyborg/ && $(MAKE) clean_config
diff --git a/simulation-system/envs/cyborg/Makefile b/simulation-system/envs/cyborg/Makefile
@@ -0,0 +1,11 @@
+# Installs the configuration in the metastore
+install:
+	python config_v_001.py --install
+
+# Uninstalls the configuration from the metastore
+uninstall:
+	python config_v_001.py --uninstall
+
+# Cleans all configuration files
+clean_config:
+	rm -rf ./config.json
diff --git a/simulation-system/envs/cyborg/README.MD b/simulation-system/envs/cyborg/README.MD
@@ -0,0 +1,26 @@
+# CyBORG, Cage challenge 2
+
+## Overview
+<p align="center">
+<img src="env.png" width="600">
+</p>
+
+## Useful commands
+
+```bash
+make install # installs the environment in the metastore
+make uninstall # uninstalls the environment from the metastore
+make clean_config # cleans the materialized config file    
+```
+
+## Author & Maintainer
+
+Kim Hammar <[email protected]>
+
+## Copyright and license
+
+[LICENSE](../../../../../LICENSE.md)
+
+Creative Commons
+
+(C) 2020-2024, Kim Hammar