-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
448 additions
and
11 deletions.
There are no files selected for viewing
22 changes: 22 additions & 0 deletions
22
examples/training/ppo/cyborg_scenario_two_defender/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Proximal Policy Optimization (PPO) for POMDP | ||
|
||
This directory contains example scripts for optimizing defender policies using PPO for the POMDP from [https://ieeexplore.ieee.org/document/9779345](https://ieeexplore.ieee.org/document/9779345) | ||
|
||
## Commands | ||
|
||
To run a script, execute: | ||
```bash | ||
python <script_name> | ||
``` | ||
|
||
## Author & Maintainer | ||
|
||
Kim Hammar <[email protected]> | ||
|
||
## Copyright and license | ||
|
||
[LICENSE](../../../../LICENSE.md) | ||
|
||
Creative Commons | ||
|
||
(C) 2020-2024, Kim Hammar |
103 changes: 103 additions & 0 deletions
103
examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import csle_common.constants.constants as constants | ||
from csle_common.dao.training.experiment_config import ExperimentConfig | ||
from csle_common.metastore.metastore_facade import MetastoreFacade | ||
from csle_common.dao.training.agent_type import AgentType | ||
from csle_common.dao.training.hparam import HParam | ||
from csle_common.dao.training.player_type import PlayerType | ||
from csle_agents.agents.ppo.ppo_agent import PPOAgent | ||
import csle_agents.constants.constants as agents_constants | ||
from csle_common.dao.training.tabular_policy import TabularPolicy | ||
|
||
if __name__ == '__main__': | ||
emulation_name = "csle-level9-040" | ||
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name) | ||
if emulation_env_config is None: | ||
raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}") | ||
simulation_name = "csle-stopping-pomdp-defender-001" | ||
simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name) | ||
if simulation_env_config is None: | ||
raise ValueError(f"Could not find a simulation with name: {simulation_name}") | ||
experiment_config = ExperimentConfig( | ||
output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}ppo_test", | ||
title="PPO test", random_seeds=[399, 98912, 999], agent_type=AgentType.PPO, | ||
log_every=1, | ||
hparams={ | ||
constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam( | ||
value=64, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER, | ||
descr="neurons per hidden layer of the policy network"), | ||
constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam( | ||
value=4, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS, | ||
descr="number of layers of the policy network"), | ||
agents_constants.PPO.STEPS_BETWEEN_UPDATES: HParam( | ||
value=1096, name=agents_constants.PPO.STEPS_BETWEEN_UPDATES, | ||
descr="number of steps in the environment for doing rollouts between policy updates"), | ||
agents_constants.COMMON.BATCH_SIZE: HParam(value=64, name=agents_constants.COMMON.BATCH_SIZE, | ||
descr="batch size for updates"), | ||
agents_constants.COMMON.LEARNING_RATE: HParam(value=0.0001, | ||
name=agents_constants.COMMON.LEARNING_RATE, | ||
descr="learning rate for updating the policy"), | ||
constants.NEURAL_NETWORKS.DEVICE: HParam(value="cpu", | ||
name=constants.NEURAL_NETWORKS.DEVICE, | ||
descr="the device to train on (cpu or cuda:x)"), | ||
agents_constants.COMMON.NUM_PARALLEL_ENVS: HParam( | ||
value=1, name=agents_constants.COMMON.NUM_PARALLEL_ENVS, | ||
descr="the nunmber of parallel environments for training"), | ||
agents_constants.COMMON.GAMMA: HParam( | ||
value=1, name=agents_constants.COMMON.GAMMA, descr="the discount factor"), | ||
agents_constants.PPO.GAE_LAMBDA: HParam( | ||
value=0.95, name=agents_constants.PPO.GAE_LAMBDA, descr="the GAE weighting term"), | ||
agents_constants.PPO.CLIP_RANGE: HParam( | ||
value=0.2, name=agents_constants.PPO.CLIP_RANGE, descr="the clip range for PPO"), | ||
agents_constants.PPO.CLIP_RANGE_VF: HParam( | ||
value=None, name=agents_constants.PPO.CLIP_RANGE_VF, | ||
descr="the clip range for PPO-update of the value network"), | ||
agents_constants.PPO.ENT_COEF: HParam( | ||
value=0.0, name=agents_constants.PPO.ENT_COEF, | ||
descr="the entropy coefficient for exploration"), | ||
agents_constants.PPO.VF_COEF: HParam(value=0.5, name=agents_constants.PPO.VF_COEF, | ||
descr="the coefficient of the value network for the loss"), | ||
agents_constants.PPO.MAX_GRAD_NORM: HParam( | ||
value=0.5, name=agents_constants.PPO.MAX_GRAD_NORM, descr="the maximum allows gradient norm"), | ||
agents_constants.PPO.TARGET_KL: HParam(value=None, | ||
name=agents_constants.PPO.TARGET_KL, | ||
descr="the target kl"), | ||
agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam( | ||
value=int(150000), name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS, | ||
descr="number of timesteps to train"), | ||
agents_constants.COMMON.EVAL_EVERY: HParam(value=10, name=agents_constants.COMMON.EVAL_EVERY, | ||
descr="training iterations between evaluations"), | ||
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=10, name=agents_constants.COMMON.EVAL_BATCH_SIZE, | ||
descr="the batch size for evaluation"), | ||
agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY, | ||
descr="how frequently to save the model"), | ||
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam( | ||
value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, | ||
descr="confidence interval"), | ||
agents_constants.COMMON.MAX_ENV_STEPS: HParam( | ||
value=500, name=agents_constants.COMMON.MAX_ENV_STEPS, | ||
descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), | ||
agents_constants.COMMON.RUNNING_AVERAGE: HParam( | ||
value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, | ||
descr="the number of samples to include when computing the running avg"), | ||
agents_constants.COMMON.L: HParam(value=3, name=agents_constants.COMMON.L, | ||
descr="the number of stop actions") | ||
}, | ||
player_type=PlayerType.DEFENDER, player_idx=0 | ||
) | ||
simulation_env_config.simulation_env_input_config.attacker_strategy = TabularPolicy( | ||
player_type=PlayerType.ATTACKER, | ||
actions=simulation_env_config.joint_action_space_config.action_spaces[1].actions, | ||
simulation_name=simulation_env_config.name, value_function=None, q_table=None, | ||
lookup_table=[ | ||
[0.8, 0.2], | ||
[1, 0], | ||
[1, 0] | ||
], | ||
agent_type=AgentType.RANDOM, avg_R=-1) | ||
# simulation_env_config.simulation_env_input_config | ||
agent = PPOAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config, | ||
experiment_config=experiment_config) | ||
experiment_execution = agent.train() | ||
MetastoreFacade.save_experiment_execution(experiment_execution) | ||
for policy in experiment_execution.result.policies.values(): | ||
MetastoreFacade.save_ppo_policy(ppo_policy=policy) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Installs the configuration in the metastore | ||
install: | ||
python config_v_001.py --install | ||
|
||
# Uninstalls the configuration from the metastore | ||
uninstall: | ||
python config_v_001.py --uninstall | ||
|
||
# Cleans all configuration files | ||
clean_config: | ||
rm -rf ./config.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# CyBORG, Cage challenge 2 | ||
|
||
## Overview | ||
<p align="center"> | ||
<img src="env.png" width="600"> | ||
</p> | ||
|
||
## Useful commands | ||
|
||
```bash | ||
make install # installs the environment in the metastore | ||
make uninstall # uninstalls the environment from the metastore | ||
make clean_config # cleans the materialized config file | ||
``` | ||
|
||
## Author & Maintainer | ||
|
||
Kim Hammar <[email protected]> | ||
|
||
## Copyright and license | ||
|
||
[LICENSE](../../../../../LICENSE.md) | ||
|
||
Creative Commons | ||
|
||
(C) 2020-2024, Kim Hammar |
Oops, something went wrong.