diff --git a/examples/manual_play/cyborg_action_space.py b/examples/manual_play/cyborg_action_space.py
index 12e09fe0b..bdb79afb3 100644
--- a/examples/manual_play/cyborg_action_space.py
+++ b/examples/manual_play/cyborg_action_space.py
@@ -11,7 +11,7 @@
         maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True,
         scanned_state=True, decoy_optimization=False)
     csle_cyborg_env = CyborgScenarioTwoDefender(config=config)
-    for k, v in csle_cyborg_env.action_id_to_type_and_host.items():
+    for k, v in csle_cyborg_env.cyborg_action_id_to_type_and_host.items():
         action_id = k
         type, host = v
         print(f"{action_id}, {BlueAgentActionType(type).name}, {host}")
diff --git a/examples/manual_play/cyborg_parallel_policy_evaluation.py b/examples/manual_play/cyborg_parallel_policy_evaluation.py
deleted file mode 100644
index 8b5b09f88..000000000
--- a/examples/manual_play/cyborg_parallel_policy_evaluation.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import numpy as np
-from csle_common.metastore.metastore_facade import MetastoreFacade
-from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
-from gym_csle_cyborg.dao.red_agent_type import RedAgentType
-from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender
-
-if __name__ == '__main__':
-    # ppo_policy = MetastoreFacade.get_ppo_policy(id=5)
-    config = CSLECyborgConfig(
-        gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
-        maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True,
-        scanned_state=True, decoy_optimization=False, cache_visited_states=True)
-    csle_cyborg_env = CyborgScenarioTwoDefender(config=config)
-    # o, _ = csle_cyborg_env.reset()
-    # print(ppo_policy.probability(o=o, a=4))
-    # import torch
-    # actions = list(csle_cyborg_env.action_id_to_type_and_host.keys())
-    # dist = ppo_policy.model.policy.get_distribution(obs=torch.tensor([o]).to(ppo_policy.model.device)).log_prob(torch.tensor(actions).to(ppo_policy.model.device)).cpu().detach().numpy()
-    # import math
-    # dist = list(map(lambda x: math.exp(x), dist))
-    # print(dist)
-    # print(max(dist))
-    # print(actions[np.argmax(dist)])
-    # print(csle_cyborg_env.action_id_to_type_and_host[actions[np.argmax(dist)]])
-    # num_evaluations = 10000
-
-    # max_horizon = 25
-    # returns = []
-    # print("Starting policy evaluation")
-    # import time
-    #
-    # start = time.time()
-    # # print(list(csle_cyborg_env.visited_cyborg_states.keys()))
-    # avg_return = csle_cyborg_env.parallel_rollout(policy_id=5, num_processes=8, num_evals_per_process=13,
-    #                                               max_horizon=25, state_id=21474836480)
-    # print(avg_return)
-    # print(time.time() - start)
-    history_visit_count = 10
-    c=20
-    for action_visit_count in range(1, 100):
-        print(np.sqrt(np.log(history_visit_count) / action_visit_count)*c)
diff --git a/examples/manual_play/cyborg_rollout_test.py b/examples/manual_play/cyborg_rollout_test.py
deleted file mode 100644
index 42a8c1e9d..000000000
--- a/examples/manual_play/cyborg_rollout_test.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import numpy as np
-from csle_common.metastore.metastore_facade import MetastoreFacade
-from csle_common.dao.training.ppo_policy import PPOPolicy
-from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
-from gym_csle_cyborg.dao.red_agent_type import RedAgentType
-from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType
-from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender
-
-
-def rollout(env: CyborgScenarioTwoDefender, policy: PPOPolicy, time_horizon: int, samples: int, first_a: int) -> float:
-    """
-    Performs rollout
-
-    :param env: the cyborg environment
-    :param policy: the base policy
-    :param time_horizon: the time horizon
-    :param samples: the number of samples
-    :param first_a: the first action
-    :return: the average return
-    """
-    returns = []
-    for i in range(samples):
-        done = False
-        o, _ = env.reset()
-        R = 0
-        t = 0
-        while not done and t < time_horizon:
-            if t == 0:
-                a = first_a
-            else:
-                a = policy.action(o=o)
-            o, r, done, _, info = env.step(a)
-            R += r
-            t += 1
-        returns.append(R + policy.value(o))
-    return float(np.mean(returns))
-
-
-if __name__ == '__main__':
-    ppo_policy = MetastoreFacade.get_ppo_policy(id=18)
-    config = CSLECyborgConfig(
-        gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
-        maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True,
-        scanned_state=True, decoy_optimization=False)
-    csle_cyborg_env = CyborgScenarioTwoDefender(config=config)
-    time_horizon = 25
-    samples = 500
-    returns = []
-    best_action = None
-    best_val = -100
-    for k, v in csle_cyborg_env.action_id_to_type_and_host.items():
-        action_id = k
-        type, host = v
-        avg_return = rollout(env=csle_cyborg_env, policy=ppo_policy, time_horizon=time_horizon, samples=samples,
-                             first_a=action_id)
-        returns.append(avg_return)
-        if avg_return > best_val:
-            best_val = avg_return
-            best_action = f"{BlueAgentActionType(type).name}, {host}"
-        print(
-            f"action: {BlueAgentActionType(type).name}, {host}, avg_return: {avg_return}, best_action: {best_action}, "
-            f"best_val: {best_val}")
diff --git a/examples/manual_play/cyborg_rollout_three.py b/examples/manual_play/cyborg_rollout_three.py
deleted file mode 100644
index 3fda5808f..000000000
--- a/examples/manual_play/cyborg_rollout_three.py
+++ /dev/null
@@ -1,58 +0,0 @@
-import numpy as np
-from csle_common.metastore.metastore_facade import MetastoreFacade
-from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
-from gym_csle_cyborg.dao.red_agent_type import RedAgentType
-from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender
-import csle_agents.constants.constants as constants
-from csle_agents.agents.pomcp.pomcp_util import POMCPUtil
-import math
-
-if __name__ == '__main__':
-    ppo_policy = MetastoreFacade.get_ppo_policy(id=15)
-    config = CSLECyborgConfig(
-        gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
-        maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, decoy_state=True,
-        scanned_state=True, decoy_optimization=False, cache_visited_states=True)
-    csle_cyborg_env = CyborgScenarioTwoDefender(config=config)
-    #324519791598466012163466353442816
-    # POMCPUtil.trajectory_simulation_particles(o=324519791598466012163474943377408,
-    #                                           env=csle_cyborg_env, action_sequence=[31, 34, 28], num_particles=100)
-    from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil
-    vec = CyborgEnvUtil.state_id_to_state_vector(state_id=14507109835375640432425280, observation=False)
-    print(vec)
-    vec = CyborgEnvUtil.state_id_to_state_vector(state_id=16018267109893926900808000, observation=False)
-    print(vec)
-
-    print(csle_cyborg_env.cyborg_hostnames)
-    #324519791598466012163474943377408
-    # 324518553658426726783181790642176
-
-
-    # o, _ = csle_cyborg_env.reset()
-    # print(ppo_policy.probability(o=o, a=4))
-    # import torch
-    # actions = list(csle_cyborg_env.action_id_to_type_and_host.keys())
-    # dist = ppo_policy.model.policy.get_distribution(obs=torch.tensor([o]).to(ppo_policy.model.device)).log_prob(torch.tensor(actions).to(ppo_policy.model.device)).cpu().detach().numpy()
-    # import math
-    # dist = list(map(lambda x: math.exp(x), dist))
-    # print(dist)
-    # print(max(dist))
-    # print(actions[np.argmax(dist)])
-    # print(csle_cyborg_env.action_id_to_type_and_host[actions[np.argmax(dist)]])
-    # num_evaluations = 10000
-
-    # max_horizon = 25
-    # returns = []
-    # print("Starting policy evaluation")
-    # import time
-    #
-    # start = time.time()
-    # # print(list(csle_cyborg_env.visited_cyborg_states.keys()))
-    # avg_return = csle_cyborg_env.parallel_rollout(policy_id=5, num_processes=8, num_evals_per_process=13,
-    #                                               max_horizon=25, state_id=21474836480)
-    # print(avg_return)
-    # print(time.time() - start)
-    # history_visit_count = 10
-    # c=20
-    # for action_visit_count in range(1, 100):
-    #     print(np.sqrt(np.log(history_visit_count) / action_visit_count)*c)
diff --git a/examples/manual_play/cyborg_rollout_two.py b/examples/manual_play/cyborg_rollout_two.py
index 5b6344fff..1ad3284a5 100644
--- a/examples/manual_play/cyborg_rollout_two.py
+++ b/examples/manual_play/cyborg_rollout_two.py
@@ -17,6 +17,7 @@
     actions = list(csle_cyborg_env.action_id_to_type_and_host.keys())
     # for i in range(25):
     import torch
+
     torch.multiprocessing.set_start_method('spawn')
     action_sequence = []
     returns = []
@@ -39,8 +40,8 @@
                 R = 0
                 for fictitious_state, prob in belief.items():
                     r = csle_cyborg_env.parallel_rollout(policy_id=15, num_processes=1, num_evals_per_process=1,
-                                                     max_horizon=1, state_id=fictitious_state)
-                    R += r*prob
+                                                         max_horizon=1, state_id=fictitious_state)
+                    R += r * prob
                 action_values.append(R)
             print(action_values)
             a_idx = np.argmax(action_values)
@@ -55,4 +56,4 @@
                 o=o_id, env=csle_cyborg_env, action_sequence=action_sequence, num_particles=10, verbose=True)
             belief = POMCPUtil.convert_samples_to_distribution(particles)
         returns.append(total_R)
-        print(f"average return: {np.mean(returns)}")
\ No newline at end of file
+        print(f"average return: {np.mean(returns)}")
diff --git a/examples/manual_play/learn_model.py b/examples/manual_play/learn_model.py
index 952cc5b52..e013796a3 100644
--- a/examples/manual_play/learn_model.py
+++ b/examples/manual_play/learn_model.py
@@ -1,4 +1,3 @@
-import numpy as np
 import io
 from csle_common.metastore.metastore_facade import MetastoreFacade
 from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
@@ -6,8 +5,6 @@
 from gym_csle_cyborg.envs.cyborg_scenario_two_defender import CyborgScenarioTwoDefender
 import csle_agents.constants.constants as constants
 import json
-from csle_agents.agents.pomcp.pomcp_util import POMCPUtil
-import math
 
 if __name__ == '__main__':
     ppo_policy = MetastoreFacade.get_ppo_policy(id=22)
@@ -42,13 +39,15 @@
                 transition_probabilities[",".join([str(s), str(s_prime), str(a)])] = 1
                 new_transitions += 1
             else:
-                transition_probabilities[",".join([str(s), str(s_prime), str(a)])] = transition_probabilities[",".join([str(s), str(s_prime), str(a)])] + 1
+                transition_probabilities[",".join([str(s), str(s_prime), str(a)])] = transition_probabilities[",".join(
+                    [str(s), str(s_prime), str(a)])] + 1
             if ",".join([str(s), str(s_prime), str(a)]) not in reward_function:
                 reward_function[",".join([str(s), str(s_prime), str(a)])] = r
             if ",".join([str(s_prime), str(oid)]) not in observation_probabilities:
                 observation_probabilities[",".join([str(s_prime), str(oid)])] = 1
             else:
-                observation_probabilities[",".join([str(s_prime), str(oid)])] = observation_probabilities[",".join([str(s_prime), str(oid)])] + 1
+                observation_probabilities[",".join([str(s_prime), str(oid)])] = observation_probabilities[",".join(
+                    [str(s_prime), str(oid)])] + 1
             t_count += 1
         print(f"new transitions: {new_transitions}")
 
@@ -63,5 +62,3 @@
             json_str = json.dumps(model, indent=4, sort_keys=True)
             with io.open(f"/home/kim/cyborg_model_{i}.json", 'w', encoding='utf-8') as f:
                 f.write(json_str)
-
-
diff --git a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two.py b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two.py
index 171ab3e16..c277b8c1f 100644
--- a/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two.py
+++ b/examples/training/pomcp/cyborg_scenario_two_defender/run_cyborg_version_two.py
@@ -73,7 +73,7 @@
                 descr="maximum number of negative samples when filling belief particles"),
             agents_constants.POMCP.PARALLEL_ROLLOUT: HParam(
                 value=False, name=agents_constants.POMCP.PARALLEL_ROLLOUT, descr="boolean flag indicating whether "
-                                                                                "parallel rollout should be used"),
+                                                                                 "parallel rollout should be used"),
             agents_constants.POMCP.NUM_PARALLEL_PROCESSES: HParam(
                 value=5, name=agents_constants.POMCP.NUM_PARALLEL_PROCESSES, descr="number of parallel processes"),
             agents_constants.POMCP.NUM_EVALS_PER_PROCESS: HParam(
@@ -99,6 +99,7 @@
         player_type=PlayerType.DEFENDER, player_idx=0
     )
     import torch
+
     torch.multiprocessing.set_start_method('spawn')
     agent = POMCPAgent(emulation_env_config=emulation_env_config, simulation_env_config=simulation_env_config,
                        experiment_config=experiment_config, save_to_metastore=False)
diff --git a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py
index 4804130da..1d1ef480c 100644
--- a/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py
+++ b/examples/training/ppo/cyborg_scenario_two_defender/run_cyborg_scenario_two_defender_meander_cardiff_decoy_state.py
@@ -20,7 +20,7 @@
         raise ValueError(f"Could not find a simulation with name: {simulation_name}")
     experiment_config = ExperimentConfig(
         output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}ppo_test",
-        title="Cardiff PPO Cyborg BLine", random_seeds=[399], agent_type=AgentType.PPO,
+        title="Cardiff PPO Cyborg Meander", random_seeds=[399], agent_type=AgentType.PPO,
         log_every=1,
         hparams={
             constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam(
diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py
index 82a79f836..5fe01e3cd 100644
--- a/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py
+++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/pomcp/pomcp_util.py
@@ -93,7 +93,7 @@ def ucb_acquisition_function(action: "Node", c: float, rollout_policy: Union[Pol
         if action.visit_count == 0:
             return np.inf
         else:
-            return action.value + (prior_weight*prior_weight)/action.visit_count
+            return action.value + (prior_weight * prior_weight) / action.visit_count
         # prior = 1.0
         # if rollout_policy is not None:
         #     prior = rollout_policy.probability(o=o, a=action.action)
diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py
index 6ff710811..38723073d 100644
--- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py
+++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/envs/cyborg_scenario_two_defender.py
@@ -14,7 +14,6 @@
 from csle_common.dao.simulation_config.base_env import BaseEnv
 from csle_common.dao.simulation_config.simulation_trace import SimulationTrace
 from csle_common.metastore.metastore_facade import MetastoreFacade
-from csle_common.logging.log import Logger
 import gym_csle_cyborg.constants.constants as env_constants
 from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
 from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType
diff --git a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py
index 53c65ed1a..032cc1d45 100644
--- a/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py
+++ b/simulation-system/libs/gym-csle-cyborg/src/gym_csle_cyborg/util/cyborg_env_util.py
@@ -56,9 +56,8 @@ def update_red_agent(config: CSLECyborgConfig, current_red_agent: RedAgentType,
 
     @staticmethod
     def setup_cyborg_env(config: CSLECyborgConfig) \
-            -> Tuple[str, ChallengeWrapper, List[str], Dict[str, int], List[str], Dict[str, int],
-                     Dict[int, Tuple[BlueAgentActionType, str]],
-                     Dict[Tuple[BlueAgentActionType, str], int], RedAgentType]:
+            -> Tuple[str, ChallengeWrapper, List[str], Dict[str, int], List[str], Dict[str, int], Dict[
+                int, Tuple[BlueAgentActionType, str]], Dict[Tuple[BlueAgentActionType, str], int], RedAgentType]:
         """
         Sets up the cyborg environment and associated metadata
 
@@ -346,11 +345,9 @@ def state_to_vector(state: List[List[Any]], decoy_state: List[List[BlueAgentActi
                 host_access = 3
             host_decoy_state = len(decoy_state[host_id])
             if not observation:
-                state_vector.append([host_access])
-                # state_vector.append([host_known, host_scanned, host_access, host_decoy_state])
+                state_vector.append([host_known, host_scanned, host_access, host_decoy_state])
             else:
-                state_vector.append([activity, host_access])
-                # state_vector.append([activity, host_scanned, host_access, host_decoy_state])
+                state_vector.append([activity, host_scanned, host_access, host_decoy_state])
         return state_vector
 
     @staticmethod
@@ -370,10 +367,10 @@ def state_vector_to_state_id(state_vector: List[List[int]], observation: bool =
                 if not observation:
                     if i == 0:
                         host_binary_id_str += format(elem, '02b')
-                    # if i == 0:
-                    #     host_binary_id_str += format(elem, '01b')
-                    # if i == 1:
-                    #     host_binary_id_str += format(elem, '01b')
+                    if i == 0:
+                        host_binary_id_str += format(elem, '01b')
+                    if i == 1:
+                        host_binary_id_str += format(elem, '01b')
                 else:
                     if i == 0:
                         host_binary_id_str += format(elem, '02b')
@@ -398,33 +395,25 @@ def state_id_to_state_vector(state_id: int, observation: bool = False) -> List[L
         :return: the state vector
         """
         if not observation:
-            # binary_id_str = format(state_id, "091b")
-            binary_id_str = format(state_id, "026b")
-            host_binary_ids_str = [binary_id_str[i:i + 2] for i in range(0, len(binary_id_str), 2)]
+            binary_id_str = format(state_id, "091b")
+            host_binary_ids_str = [binary_id_str[i:i + 7] for i in range(0, len(binary_id_str), 7)]
         else:
-            # binary_id_str = format(state_id, "0117b")
-            binary_id_str = format(state_id, "052b")
-            # host_binary_ids_str = [binary_id_str[i:i + 9] for i in range(0, len(binary_id_str), 9)]
-            host_binary_ids_str = [binary_id_str[i:i + 4] for i in range(0, len(binary_id_str), 4)]
+            binary_id_str = format(state_id, "0117b")
+            host_binary_ids_str = [binary_id_str[i:i + 9] for i in range(0, len(binary_id_str), 9)]
         state_vector = []
         for host_bin in host_binary_ids_str:
             if not observation:
-                access = int(host_bin[0:2], 2)
-                # known = int(host_bin[0:1], 2)
-                # scanned = int(host_bin[1:2], 2)
-                # access = int(host_bin[2:4], 2)
-                # decoy = int(host_bin[4:7], 2)
-                host_vector = [access]
-                # host_vector = [known, scanned, access, decoy]
+                known = int(host_bin[0:1], 2)
+                scanned = int(host_bin[1:2], 2)
+                access = int(host_bin[2:4], 2)
+                decoy = int(host_bin[4:7], 2)
+                host_vector = [known, scanned, access, decoy]
             else:
                 activity = int(host_bin[0:2], 2)
-                access = int(host_bin[2:4], 2)
-                # activity = int(host_bin[0:2], 2)
-                # scanned = int(host_bin[2:4], 2)
-                # access = int(host_bin[4:6], 2)
-                # decoy = int(host_bin[6:9], 2)
-                host_vector = [activity, access]
-                # host_vector = [activity, scanned, access, decoy]
+                scanned = int(host_bin[2:4], 2)
+                access = int(host_bin[4:6], 2)
+                decoy = int(host_bin[6:9], 2)
+                host_vector = [activity, scanned, access, decoy]
             state_vector.append(host_vector)
         return state_vector