Skip to content

Commit

Permalink
csle-cyborg [WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
Limmen committed Jan 11, 2024
1 parent 74f580a commit 2bda03b
Show file tree
Hide file tree
Showing 11 changed files with 448 additions and 11 deletions.
22 changes: 22 additions & 0 deletions examples/training/ppo/cyborg_scenario_two_defender/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Proximal Policy Optimization (PPO) for POMDP

This directory contains example scripts for optimizing defender policies using PPO for the POMDP from [https://ieeexplore.ieee.org/document/9779345](https://ieeexplore.ieee.org/document/9779345)

## Commands

To run a script, execute:
```bash
python <script_name>
```

## Author & Maintainer

Kim Hammar <[email protected]>

## Copyright and license

[LICENSE](../../../../LICENSE.md)

Creative Commons

(C) 2020-2024, Kim Hammar
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import csle_common.constants.constants as constants
from csle_common.dao.training.experiment_config import ExperimentConfig
from csle_common.metastore.metastore_facade import MetastoreFacade
from csle_common.dao.training.agent_type import AgentType
from csle_common.dao.training.hparam import HParam
from csle_common.dao.training.player_type import PlayerType
from csle_agents.agents.ppo.ppo_agent import PPOAgent
import csle_agents.constants.constants as agents_constants
from csle_common.dao.training.tabular_policy import TabularPolicy

if __name__ == '__main__':
emulation_name = "csle-level9-040"
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
if emulation_env_config is None:
raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
simulation_name = "csle-stopping-pomdp-defender-001"
simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
if simulation_env_config is None:
raise ValueError(f"Could not find a simulation with name: {simulation_name}")
experiment_config = ExperimentConfig(
output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}ppo_test",
title="PPO test", random_seeds=[399, 98912, 999], agent_type=AgentType.PPO,
log_every=1,
hparams={
constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam(
value=64, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
descr="neurons per hidden layer of the policy network"),
constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam(
value=4, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
descr="number of layers of the policy network"),
agents_constants.PPO.STEPS_BETWEEN_UPDATES: HParam(
value=1096, name=agents_constants.PPO.STEPS_BETWEEN_UPDATES,
descr="number of steps in the environment for doing rollouts between policy updates"),
agents_constants.COMMON.BATCH_SIZE: HParam(value=64, name=agents_constants.COMMON.BATCH_SIZE,
descr="batch size for updates"),
agents_constants.COMMON.LEARNING_RATE: HParam(value=0.0001,
name=agents_constants.COMMON.LEARNING_RATE,
descr="learning rate for updating the policy"),
constants.NEURAL_NETWORKS.DEVICE: HParam(value="cpu",
name=constants.NEURAL_NETWORKS.DEVICE,
descr="the device to train on (cpu or cuda:x)"),
agents_constants.COMMON.NUM_PARALLEL_ENVS: HParam(
value=1, name=agents_constants.COMMON.NUM_PARALLEL_ENVS,
descr="the nunmber of parallel environments for training"),
agents_constants.COMMON.GAMMA: HParam(
value=1, name=agents_constants.COMMON.GAMMA, descr="the discount factor"),
agents_constants.PPO.GAE_LAMBDA: HParam(
value=0.95, name=agents_constants.PPO.GAE_LAMBDA, descr="the GAE weighting term"),
agents_constants.PPO.CLIP_RANGE: HParam(
value=0.2, name=agents_constants.PPO.CLIP_RANGE, descr="the clip range for PPO"),
agents_constants.PPO.CLIP_RANGE_VF: HParam(
value=None, name=agents_constants.PPO.CLIP_RANGE_VF,
descr="the clip range for PPO-update of the value network"),
agents_constants.PPO.ENT_COEF: HParam(
value=0.0, name=agents_constants.PPO.ENT_COEF,
descr="the entropy coefficient for exploration"),
agents_constants.PPO.VF_COEF: HParam(value=0.5, name=agents_constants.PPO.VF_COEF,
descr="the coefficient of the value network for the loss"),
agents_constants.PPO.MAX_GRAD_NORM: HParam(
value=0.5, name=agents_constants.PPO.MAX_GRAD_NORM, descr="the maximum allows gradient norm"),
agents_constants.PPO.TARGET_KL: HParam(value=None,
name=agents_constants.PPO.TARGET_KL,
descr="the target kl"),
agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam(
value=int(150000), name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
descr="number of timesteps to train"),
agents_constants.COMMON.EVAL_EVERY: HParam(value=10, name=agents_constants.COMMON.EVAL_EVERY,
descr="training iterations between evaluations"),
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=10, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
descr="the batch size for evaluation"),
agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY,
descr="how frequently to save the model"),
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
descr="confidence interval"),
agents_constants.COMMON.MAX_ENV_STEPS: HParam(
value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
agents_constants.COMMON.RUNNING_AVERAGE: HParam(
value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
descr="the number of samples to include when computing the running avg"),
agents_constants.COMMON.L: HParam(value=3, name=agents_constants.COMMON.L,
descr="the number of stop actions")
},
player_type=PlayerType.DEFENDER, player_idx=0
)
simulation_env_config.simulation_env_input_config.attacker_strategy = TabularPolicy(
player_type=PlayerType.ATTACKER,
actions=simulation_env_config.joint_action_space_config.action_spaces[1].actions,
simulation_name=simulation_env_config.name, value_function=None, q_table=None,
lookup_table=[
[0.8, 0.2],
[1, 0],
[1, 0]
],
agent_type=AgentType.RANDOM, avg_R=-1)
# simulation_env_config.simulation_env_input_config
agent = PPOAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config,
experiment_config=experiment_config)
experiment_execution = agent.train()
MetastoreFacade.save_experiment_execution(experiment_execution)
for policy in experiment_execution.result.policies.values():
MetastoreFacade.save_ppo_policy(ppo_policy=policy)
12 changes: 12 additions & 0 deletions simulation-system/envs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ install_apt_mdp_attacker:
install_apt_pomdp_defender:
cd apt_pomdp_defender/ && $(MAKE) install

install_cyborg:
cd cyborg/ && $(MAKE) install

# Installs all simulations
install:
cd stopping_game/ && $(MAKE) install
Expand All @@ -49,6 +52,7 @@ install:
cd apt_game/ && $(MAKE) install
cd apt_mdp_attacker/ && $(MAKE) install
cd apt_pomdp_defender/ && $(MAKE) install
cd cyborg/ && $(MAKE) install

# Targets for uninstalling each individual env
uninstall_stopping_game:
Expand Down Expand Up @@ -87,6 +91,9 @@ uninstall_apt_mdp_attacker:
uninstall_apt_pomdp_defender:
cd apt_pomdp_defender/ && $(MAKE) uninstall

uninstall_cyborg:
cd cyborg/ && $(MAKE) uninstall

# Uninstalls all simulation envs
uninstall:
cd stopping_game/ && $(MAKE) uninstall
Expand All @@ -101,6 +108,7 @@ uninstall:
cd apt_game/ && $(MAKE) uninstall
cd apt_mdp_attacker/ && $(MAKE) uninstall
cd apt_pomdp_defender/ && $(MAKE) uninstall
cd cyborg/ && $(MAKE) uninstall

# Targets for cleaning the config each individual env
clean_config_stopping_game:
Expand Down Expand Up @@ -139,6 +147,9 @@ clean_config_apt_mdp_attacker:
clean_config_apt_pomdp_defender:
cd apt_pomdp_defender/ && $(MAKE) clean_config

cyborg:
cd cyborg/ && $(MAKE) clean_config

# Cleans the materialized configuration of each emulation
clean_config:
cd stopping_game/ && $(MAKE) clean_config
Expand All @@ -153,3 +164,4 @@ clean_config:
cd apt_game/ && $(MAKE) clean_config
cd apt_mdp_attacker/ && $(MAKE) clean_config
cd apt_pomdp_defender/ && $(MAKE) clean_config
cd cyborg/ && $(MAKE) clean_config
11 changes: 11 additions & 0 deletions simulation-system/envs/cyborg/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Installs the configuration in the metastore
install:
python config_v_001.py --install

# Uninstalls the configuration from the metastore
uninstall:
python config_v_001.py --uninstall

# Cleans all configuration files
clean_config:
rm -rf ./config.json
26 changes: 26 additions & 0 deletions simulation-system/envs/cyborg/README.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# CyBORG, Cage challenge 2

## Overview
<p align="center">
<img src="env.png" width="600">
</p>

## Useful commands

```bash
make install # installs the environment in the metastore
make uninstall # uninstalls the environment from the metastore
make clean_config # cleans the materialized config file
```

## Author & Maintainer

Kim Hammar <[email protected]>

## Copyright and license

[LICENSE](../../../../../LICENSE.md)

Creative Commons

(C) 2020-2024, Kim Hammar
Loading

0 comments on commit 2bda03b

Please sign in to comment.