diff --git a/RL_Framework/__pycache__/cnn_policy.cpython-36.pyc b/RL_Framework/__pycache__/cnn_policy.cpython-36.pyc index 9667181b..fb0eba1e 100644 Binary files a/RL_Framework/__pycache__/cnn_policy.cpython-36.pyc and b/RL_Framework/__pycache__/cnn_policy.cpython-36.pyc differ diff --git a/RL_Framework/gym_simalphagarden/simalphagarden/__pycache__/__init__.cpython-36.pyc b/RL_Framework/gym_simalphagarden/simalphagarden/__pycache__/__init__.cpython-36.pyc index 24f2d021..f5b660cd 100644 Binary files a/RL_Framework/gym_simalphagarden/simalphagarden/__pycache__/__init__.cpython-36.pyc and b/RL_Framework/gym_simalphagarden/simalphagarden/__pycache__/__init__.cpython-36.pyc differ diff --git a/RL_Framework/gym_simalphagarden/simalphagarden/envs/__pycache__/__init__.cpython-36.pyc b/RL_Framework/gym_simalphagarden/simalphagarden/envs/__pycache__/__init__.cpython-36.pyc index db015d7b..3690edef 100644 Binary files a/RL_Framework/gym_simalphagarden/simalphagarden/envs/__pycache__/__init__.cpython-36.pyc and b/RL_Framework/gym_simalphagarden/simalphagarden/envs/__pycache__/__init__.cpython-36.pyc differ diff --git a/RL_Framework/gym_simalphagarden/simalphagarden/envs/__pycache__/simalphagarden_env.cpython-36.pyc b/RL_Framework/gym_simalphagarden/simalphagarden/envs/__pycache__/simalphagarden_env.cpython-36.pyc index 21971859..e6e0fa09 100644 Binary files a/RL_Framework/gym_simalphagarden/simalphagarden/envs/__pycache__/simalphagarden_env.cpython-36.pyc and b/RL_Framework/gym_simalphagarden/simalphagarden/envs/__pycache__/simalphagarden_env.cpython-36.pyc differ diff --git a/RL_Framework/gym_simalphagarden/wrapperenv/__pycache__/__init__.cpython-36.pyc b/RL_Framework/gym_simalphagarden/wrapperenv/__pycache__/__init__.cpython-36.pyc index 7f7af1bb..128e570d 100644 Binary files a/RL_Framework/gym_simalphagarden/wrapperenv/__pycache__/__init__.cpython-36.pyc and b/RL_Framework/gym_simalphagarden/wrapperenv/__pycache__/__init__.cpython-36.pyc differ diff --git a/RL_Framework/gym_simalphagarden/wrapperenv/__pycache__/wrapper_interface.cpython-36.pyc b/RL_Framework/gym_simalphagarden/wrapperenv/__pycache__/wrapper_interface.cpython-36.pyc index e08bad8a..fa6d1045 100644 Binary files a/RL_Framework/gym_simalphagarden/wrapperenv/__pycache__/wrapper_interface.cpython-36.pyc and b/RL_Framework/gym_simalphagarden/wrapperenv/__pycache__/wrapper_interface.cpython-36.pyc differ diff --git a/RL_Framework/pipeline.py b/RL_Framework/pipeline.py index 4810c3e8..af2ceeca 100755 --- a/RL_Framework/pipeline.py +++ b/RL_Framework/pipeline.py @@ -184,7 +184,7 @@ def evaluate_policy(self, folder_path, num_evals, env, is_baseline=False, baseli pathlib.Path(folder_path + '/Returns').mkdir(parents=True, exist_ok=True) filename = folder_path + '/Returns' + '/predict_' + str(i) + '.json' f = open(filename, 'w') - f.write(json.dumps(e)) + f.write(json.dumps(e, indent=4)) f.close() def single_run(self, folder_path, num_evals, policy_kwargs=None, is_baseline=False, baseline_policy=None): @@ -240,7 +240,7 @@ def single_run(self, folder_path, num_evals, policy_kwargs=None, is_baseline=Fal # Instantiate the agent # model = PPO2(CustomCnnPolicy, env, policy_kwargs=policy_kwargs, ent_coef=ent_coef, n_steps=n_steps, nminibatches=nminibatches, noptepochs=noptepochs, learning_rate=learning_rate, verbose=1, tensorboard_log=folder_path + '/ppo_v2_tensorboard/') - model = PPO2(MlpPolicy, env, verbose=1, tensorboard_log=folder_path + '/ppo_v2_tensorboard/') + model = PPO2(MlpPolicy, env, ent_coef=ent_coef, n_steps=n_steps, nminibatches=nminibatches, noptepochs=noptepochs, learning_rate=learning_rate, verbose=1, tensorboard_log=folder_path + '/ppo_v2_tensorboard/') # Train the agent model.learn(total_timesteps=rl_time_steps) # this will crash explaining that the invalid value originated from the env @@ -336,7 +336,7 @@ def batch_run(self, n, rl_config, garden_x, garden_y, num_plant_types, num_plant rl_config = [ { 'rl_algorithm': 'MLP', - 'time_steps': 100000, + 'time_steps': 7000000, 'ent_coef': 0.0, 'n_steps': 40000, 'nminibatches': 4, @@ -360,8 +360,8 @@ def batch_run(self, n, rl_config, garden_x, garden_y, num_plant_types, num_plant "NUM_CONVS": 1, "FILTER_SIZE": 1, "STRIDE": 1, - 'CC_COEF': 0.6, - 'WATER_COEF': 0.4 + 'CC_COEF': 10, + 'WATER_COEF': 100 } ] num_evals = 50 diff --git a/Simulator_v2/simulatorv2/SimAlphaGardenWrapper.py b/Simulator_v2/simulatorv2/SimAlphaGardenWrapper.py index eba0d5a3..74742fde 100644 --- a/Simulator_v2/simulatorv2/SimAlphaGardenWrapper.py +++ b/Simulator_v2/simulatorv2/SimAlphaGardenWrapper.py @@ -40,7 +40,8 @@ def reward(self, state): entropy = -np.sum(prob*np.log(prob)) water_coef = self.config.getfloat('cnn', 'water_coef') cc_coef = self.config.getfloat('cnn', 'cc_coef') - return (cc_coef * total_cc) + (0 * entropy) - (water_coef * np.sum(self.curr_action)) + action_sum = self.N * self.M + return (cc_coef * total_cc) + (0 * entropy) + water_coef * np.sum(-1 * self.curr_action/action_sum + 1) ''' Method called by the gym environment to execute an action. diff --git a/Simulator_v2/simulatorv2/garden.py b/Simulator_v2/simulatorv2/garden.py index b5a58ab6..a4aba15d 100644 --- a/Simulator_v2/simulatorv2/garden.py +++ b/Simulator_v2/simulatorv2/garden.py @@ -1,8 +1,13 @@ import numpy as np from simulatorv2.logger import Logger, Event from simulatorv2.plant import Plant +<<<<<<< HEAD from simulatorv2.visualization import setup_animation from simulatorv2.sim_globals import MAX_WATER_LEVEL +======= +from visualization import setup_animation +from sim_globals import MAX_WATER_LEVEL +>>>>>>> 7ad33658520da88f64bf99fb211839a1ef43230e class Garden: def __init__(self, plants=[], N=50, M=50, step=1, drainage_rate=0.4, irr_threshold=5, plant_types=[], skip_initial_germination=True, animate=False): @@ -290,4 +295,4 @@ def show_animation(self): if self.animate: self.anim_show() else: - print("[Garden] No animation to show. Set animate=True when initializing to allow animating history of garden!") \ No newline at end of file + print("[Garden] No animation to show. Set animate=True when initializing to allow animating history of garden!")