diff --git a/examples/manual_play/cyborg_action_space.py b/examples/manual_play/cyborg_action_space.py index 12e09fe0b..bdb79afb3 100644 --- a/examples/manual_play/cyborg_action_space.py +++ b/examples/manual_play/cyborg_action_space.py @@ -11,7 +11,7 @@ maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True, scanned_state=True, decoy_optimization=False) csle_cyborg_env = CyborgScenarioTwoDefender(config=config) - for k, v in csle_cyborg_env.action_id_to_type_and_host.items(): + for k, v in csle_cyborg_env.cyborg_action_id_to_type_and_host.items(): action_id = k type, host = v print(f"{action_id}, {BlueAgentActionType(type).name}, {host}") diff --git a/examples/manual_play/cyborg_parallel_policy_evaluation.py b/examples/manual_play/cyborg_parallel_policy_evaluation.py deleted file mode 100644 index 8b5b09f88..000000000 --- a/examples/manual_play/cyborg_parallel_policy_evaluation.py +++ /dev/null @@ -1,41 +0,0 @@ -import numpy as np -from csle_common.metastore.metastore_facade import MetastoreFacade -from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig -from gym_csle_cyborg.dao.red_agent_type import RedAgentType -from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender - -if __name__ == '__main__': - # ppo_policy = MetastoreFacade.get_ppo_policy(id=5) - config = CSLECyborgConfig( - gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT], - maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True, - scanned_state=True, decoy_optimization=False, cache_visited_states=True) - csle_cyborg_env = CyborgScenarioTwoDefender(config=config) - # o, _ = csle_cyborg_env.reset() - # print(ppo_policy.probability(o=o, a=4)) - # import torch - # actions = list(csle_cyborg_env.action_id_to_type_and_host.keys()) - # dist = ppo_policy.model.policy.get_distribution(obs=torch.tensor([o]).to(ppo_policy.model.device)).log_prob(torch.tensor(actions).to(ppo_policy.model.device)).cpu().detach().numpy() - # import math - # dist = list(map(lambda x: math.exp(x), dist)) - # print(dist) - # print(max(dist)) - # print(actions[np.argmax(dist)]) - # print(csle_cyborg_env.action_id_to_type_and_host[actions[np.argmax(dist)]]) - # num_evaluations = 10000 - - # max_horizon = 25 - # returns = [] - # print("Starting policy evaluation") - # import time - # - # start = time.time() - # # print(list(csle_cyborg_env.visited_cyborg_states.keys())) - # avg_return = csle_cyborg_env.parallel_rollout(policy_id=5, num_processes=8, num_evals_per_process=13, - # max_horizon=25, state_id=21474836480) - # print(avg_return) - # print(time.time() - start) - history_visit_count = 10 - c=20 - for action_visit_count in range(1, 100): - print(np.sqrt(np.log(history_visit_count) / action_visit_count)*c) diff --git a/examples/manual_play/cyborg_rollout_test.py b/examples/manual_play/cyborg_rollout_test.py deleted file mode 100644 index 42a8c1e9d..000000000 --- a/examples/manual_play/cyborg_rollout_test.py +++ /dev/null @@ -1,62 +0,0 @@ -import numpy as np -from csle_common.metastore.metastore_facade import MetastoreFacade -from csle_common.dao.training.ppo_policy import PPOPolicy -from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig -from gym_csle_cyborg.dao.red_agent_type import RedAgentType -from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType -from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender - - -def rollout(env: CyborgScenarioTwoDefender, policy: PPOPolicy, time_horizon: int, samples: int, first_a: int) -> float: - """ - Performs rollout - - :param env: the cyborg environment - :param policy: the base policy - :param time_horizon: the time horizon - :param samples: the number of samples - :param first_a: the first action - :return: the average return - """ - returns = [] - for i in range(samples): - done = False - o, _ = env.reset() - R = 0 - t = 0 - while not done and t < time_horizon: - if t == 0: - a = first_a - else: - a = policy.action(o=o) - o, r, done, _, info = env.step(a) - R += r - t += 1 - returns.append(R + policy.value(o)) - return float(np.mean(returns)) - - -if __name__ == '__main__': - ppo_policy = MetastoreFacade.get_ppo_policy(id=18) - config = CSLECyborgConfig( - gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT], - maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True, - scanned_state=True, decoy_optimization=False) - csle_cyborg_env = CyborgScenarioTwoDefender(config=config) - time_horizon = 25 - samples = 500 - returns = [] - best_action = None - best_val = -100 - for k, v in csle_cyborg_env.action_id_to_type_and_host.items(): - action_id = k - type, host = v - avg_return = rollout(env=csle_cyborg_env, policy=ppo_policy, time_horizon=time_horizon, samples=samples, - first_a=action_id) - returns.append(avg_return) - if avg_return > best_val: - best_val = avg_return - best_action = f"{BlueAgentActionType(type).name}, {host}" - print( - f"action: {BlueAgentActionType(type).name}, {host}, avg_return: {avg_return}, best_action: {best_action}, " - f"best_val: {best_val}") diff --git a/examples/manual_play/cyborg_rollout_three.py b/examples/manual_play/cyborg_rollout_three.py deleted file mode 100644 index 3fda5808f..000000000 --- a/examples/manual_play/cyborg_rollout_three.py +++ /dev/null @@ -1,58 +0,0 @@ -import numpy as np -from csle_common.metastore.metastore_facade import MetastoreFacade -from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig -from gym_csle_cyborg.dao.red_agent_type import RedAgentType -from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender -import csle_agents.constants.constants as constants -from csle_agents.agents.pomcp.pomcp_util import POMCPUtil -import math - -if __name__ == '__main__': - ppo_policy = MetastoreFacade.get_ppo_policy(id=15) - config = CSLECyborgConfig( - gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT], - maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True, - scanned_state=True, decoy_optimization=False, cache_visited_states=True) - csle_cyborg_env = CyborgScenarioTwoDefender(config=config) - #324519791598466012163466353442816 - # POMCPUtil.trajectory_simulation_particles(o=324519791598466012163474943377408, - # env=csle_cyborg_env, action_sequence=[31, 34, 28], num_particles=100) - from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil - vec = CyborgEnvUtil.state_id_to_state_vector(state_id=14507109835375640432425280, observation=False) - print(vec) - vec = CyborgEnvUtil.state_id_to_state_vector(state_id=16018267109893926900808000, observation=False) - print(vec) - - print(csle_cyborg_env.cyborg_hostnames) - #324519791598466012163474943377408 - # 324518553658426726783181790642176 - - - # o, _ = csle_cyborg_env.reset() - # print(ppo_policy.probability(o=o, a=4)) - # import torch - # actions = list(csle_cyborg_env.action_id_to_type_and_host.keys()) - # dist = ppo_policy.model.policy.get_distribution(obs=torch.tensor([o]).to(ppo_policy.model.device)).log_prob(torch.tensor(actions).to(ppo_policy.model.device)).cpu().detach().numpy() - # import math - # dist = list(map(lambda x: math.exp(x), dist)) - # print(dist) - # print(max(dist)) - # print(actions[np.argmax(dist)]) - # print(csle_cyborg_env.action_id_to_type_and_host[actions[np.argmax(dist)]]) - # num_evaluations = 10000 - - # max_horizon = 25 - # returns = [] - # print("Starting policy evaluation") - # import time - # - # start = time.time() - # # print(list(csle_cyborg_env.visited_cyborg_states.keys())) - # avg_return = csle_cyborg_env.parallel_rollout(policy_id=5, num_processes=8, num_evals_per_process=13, - # max_horizon=25, state_id=21474836480) - # print(avg_return) - # print(time.time() - start) - # history_visit_count = 10 - # c=20 - # for action_visit_count in range(1, 100): - # print(np.sqrt(np.log(history_visit_count) / action_visit_count)*c) diff --git a/examples/manual_play/cyborg_rollout_two.py b/examples/manual_play/cyborg_rollout_two.py index 5b6344fff..1ad3284a5 100644 --- a/examples/manual_play/cyborg_rollout_two.py +++ b/examples/manual_play/cyborg_rollout_two.py @@ -17,6 +17,7 @@ actions = list(csle_cyborg_env.action_id_to_type_and_host.keys()) # for i in range(25): import torch + torch.multiprocessing.set_start_method('spawn') action_sequence = [] returns = [] @@ -39,8 +40,8 @@ R = 0 for fictitious_state, prob in belief.items(): r = csle_cyborg_env.parallel_rollout(policy_id=15, num_processes=1, num_evals_per_process=1, - max_horizon=1, state_id=fictitious_state) - R += r*prob + max_horizon=1, state_id=fictitious_state) + R += r * prob action_values.append(R) print(action_values) a_idx = np.argmax(action_values) @@ -55,4 +56,4 @@ o=o_id, env=csle_cyborg_env, action_sequence=action_sequence, num_particles=10, verbose=True) belief = POMCPUtil.convert_samples_to_distribution(particles) returns.append(total_R) - print(f"average return: {np.mean(returns)}") \ No newline at end of file + print(f"average return: {np.mean(returns)}") diff --git a/examples/manual_play/learn_model.py b/examples/manual_play/learn_model.py index 952cc5b52..e013796a3 100644 --- a/examples/manual_play/learn_model.py +++ b/examples/manual_play/learn_model.py @@ -1,4 +1,3 @@ -import numpy as np import io from csle_common.metastore.metastore_facade import MetastoreFacade from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig @@ -6,8 +5,6 @@ from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender import csle_agents.constants.constants as constants import json -from csle_agents.agents.pomcp.pomcp_util import POMCPUtil -import math if __name__ == '__main__': ppo_policy = MetastoreFacade.get_ppo_policy(id=22) @@ -42,13 +39,15 @@ transition_probabilities[",".join([str(s), str(s_prime), str(a)])] = 1 new_transitions += 1 else: - transition_probabilities[",".join([str(s), str(s_prime), str(a)])] = transition_probabilities[",".join([str(s), str(s_prime), str(a)])] + 1 + transition_probabilities[",".join([str(s), str(s_prime), str(a)])] = transition_probabilities[",".join( + [str(s), str(s_prime), str(a)])] + 1 if ",".join([str(s), str(s_prime), str(a)]) not in reward_function: reward_function[",".join([str(s), str(s_prime), str(a)])] = r if ",".join([str(s_prime), str(oid)]) not in observation_probabilities: observation_probabilities[",".join([str(s_prime), str(oid)])] = 1 else: - observation_probabilities[",".join([str(s_prime), str(oid)])] = observation_probabilities[",".join([str(s_prime), str(oid)])] + 1 + observation_probabilities[",".join([str(s_prime), str(oid)])] = observation_probabilities[",".join( + [str(s_prime), str(oid)])] + 1 t_count += 1 print(f"new transitions: {new_transitions}") @@ -63,5 +62,3 @@ json_str = json.dumps(model, indent=4, sort_keys=True) with io.open(f"/home/kim/cyborg_model_{i}.json", 'w', encoding='utf-8') as f: f.write(json_str) - - diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two.py index 171ab3e16..c277b8c1f 100644 --- a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two.py +++ b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two.py @@ -73,7 +73,7 @@ descr="maximum number of negative samples when filling belief particles"), agents_constants.POMCP.PARALLEL_ROLLOUT: HParam( value=False, name=agents_constants.POMCP.PARALLEL_ROLLOUT, descr="boolean flag indicating whether " - "parallel rollout should be used"), + "parallel rollout should be used"), agents_constants.POMCP.NUM_PARALLEL_PROCESSES: HParam( value=5, name=agents_constants.POMCP.NUM_PARALLEL_PROCESSES, descr="number of parallel processes"), agents_constants.POMCP.NUM_EVALS_PER_PROCESS: HParam( @@ -99,6 +99,7 @@ player_type=PlayerType.DEFENDER, player_idx=0 ) import torch + torch.multiprocessing.set_start_method('spawn') agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config, experiment_config=experiment_config, save_to_metastore=False) diff --git a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py index 4804130da..1d1ef480c 100644 --- a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py +++ b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py @@ -20,7 +20,7 @@ raise ValueError(f"Could not find a simulation with name: {simulation_name}") experiment_config = ExperimentConfig( output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}ppo_test", - title="Cardiff PPO Cyborg BLine", random_seeds=[399], agent_type=AgentType.PPO, + title="Cardiff PPO Cyborg Meander", random_seeds=[399], agent_type=AgentType.PPO, log_every=1, hparams={ constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam( diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py index 82a79f836..5fe01e3cd 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py @@ -93,7 +93,7 @@ def ucb_acquisition_function(action: "Node", c: float, rollout_policy: Union[Pol if action.visit_count == 0: return np.inf else: - return action.value + (prior_weight*prior_weight)/action.visit_count + return action.value + (prior_weight * prior_weight) / action.visit_count # prior = 1.0 # if rollout_policy is not None: # prior = rollout_policy.probability(o=o, a=action.action) diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py index 6ff710811..38723073d 100644 --- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py +++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py @@ -14,7 +14,6 @@ from csle_common.dao.simulation_config.base_env import BaseEnv from csle_common.dao.simulation_config.simulation_trace import SimulationTrace from csle_common.metastore.metastore_facade import MetastoreFacade -from csle_common.logging.log import Logger import gym_csle_cyborg.constants.constants as env_constants from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py index 53c65ed1a..032cc1d45 100644 --- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py +++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py @@ -56,9 +56,8 @@ def update_red_agent(config: CSLECyborgConfig, current_red_agent: RedAgentType, @staticmethod def setup_cyborg_env(config: CSLECyborgConfig) \ - -> Tuple[str, ChallengeWrapper, List[str], Dict[str, int], List[str], Dict[str, int], - Dict[int, Tuple[BlueAgentActionType, str]], - Dict[Tuple[BlueAgentActionType, str], int], RedAgentType]: + -> Tuple[str, ChallengeWrapper, List[str], Dict[str, int], List[str], Dict[str, int], Dict[ + int, Tuple[BlueAgentActionType, str]], Dict[Tuple[BlueAgentActionType, str], int], RedAgentType]: """ Sets up the cyborg environment and associated metadata @@ -346,11 +345,9 @@ def state_to_vector(state: List[List[Any]], decoy_state: List[List[BlueAgentActi host_access = 3 host_decoy_state = len(decoy_state[host_id]) if not observation: - state_vector.append([host_access]) - # state_vector.append([host_known, host_scanned, host_access, host_decoy_state]) + state_vector.append([host_known, host_scanned, host_access, host_decoy_state]) else: - state_vector.append([activity, host_access]) - # state_vector.append([activity, host_scanned, host_access, host_decoy_state]) + state_vector.append([activity, host_scanned, host_access, host_decoy_state]) return state_vector @staticmethod @@ -370,10 +367,10 @@ def state_vector_to_state_id(state_vector: List[List[int]], observation: bool = if not observation: if i == 0: host_binary_id_str += format(elem, '02b') - # if i == 0: - # host_binary_id_str += format(elem, '01b') - # if i == 1: - # host_binary_id_str += format(elem, '01b') + if i == 0: + host_binary_id_str += format(elem, '01b') + if i == 1: + host_binary_id_str += format(elem, '01b') else: if i == 0: host_binary_id_str += format(elem, '02b') @@ -398,33 +395,25 @@ def state_id_to_state_vector(state_id: int, observation: bool = False) -> List[L :return: the state vector """ if not observation: - # binary_id_str = format(state_id, "091b") - binary_id_str = format(state_id, "026b") - host_binary_ids_str = [binary_id_str[i:i + 2] for i in range(0, len(binary_id_str), 2)] + binary_id_str = format(state_id, "091b") + host_binary_ids_str = [binary_id_str[i:i + 7] for i in range(0, len(binary_id_str), 7)] else: - # binary_id_str = format(state_id, "0117b") - binary_id_str = format(state_id, "052b") - # host_binary_ids_str = [binary_id_str[i:i + 9] for i in range(0, len(binary_id_str), 9)] - host_binary_ids_str = [binary_id_str[i:i + 4] for i in range(0, len(binary_id_str), 4)] + binary_id_str = format(state_id, "0117b") + host_binary_ids_str = [binary_id_str[i:i + 9] for i in range(0, len(binary_id_str), 9)] state_vector = [] for host_bin in host_binary_ids_str: if not observation: - access = int(host_bin[0:2], 2) - # known = int(host_bin[0:1], 2) - # scanned = int(host_bin[1:2], 2) - # access = int(host_bin[2:4], 2) - # decoy = int(host_bin[4:7], 2) - host_vector = [access] - # host_vector = [known, scanned, access, decoy] + known = int(host_bin[0:1], 2) + scanned = int(host_bin[1:2], 2) + access = int(host_bin[2:4], 2) + decoy = int(host_bin[4:7], 2) + host_vector = [known, scanned, access, decoy] else: activity = int(host_bin[0:2], 2) - access = int(host_bin[2:4], 2) - # activity = int(host_bin[0:2], 2) - # scanned = int(host_bin[2:4], 2) - # access = int(host_bin[4:6], 2) - # decoy = int(host_bin[6:9], 2) - host_vector = [activity, access] - # host_vector = [activity, scanned, access, decoy] + scanned = int(host_bin[2:4], 2) + access = int(host_bin[4:6], 2) + decoy = int(host_bin[6:9], 2) + host_vector = [activity, scanned, access, decoy] state_vector.append(host_vector) return state_vector