diff --git a/benchmark.md b/benchmark.md index 38eb84f..7d32af1 100644 --- a/benchmark.md +++ b/benchmark.md @@ -85,7 +85,7 @@ |sac |HumanoidBulletEnv-v0 | 2048.187| 829.776| 149886| 172| |sac |InvertedDoublePendulumBulletEnv-v0 | 9357.406| 0.504| 150000| 150| |sac |InvertedPendulumSwingupBulletEnv-v0| 891.508| 0.963| 150000| 150| -|sac |LunarLanderContinuous-v2 | 194.191| 100.631| 149699| 304| +|sac |LunarLanderContinuous-v2 | 269.783| 57.077| 149852| 709| |sac |Pendulum-v0 | -159.669| 86.665| 150000| 750| |sac |ReacherBulletEnv-v0 | 17.529| 9.860| 150000| 1000| |sac |Walker2DBulletEnv-v0 | 2052.646| 13.631| 150000| 150| diff --git a/enjoy.py b/enjoy.py index 5bf98f5..19b95f0 100644 --- a/enjoy.py +++ b/enjoy.py @@ -71,7 +71,10 @@ should_render=not args.no_render, hyperparams=hyperparams) -model = ALGOS[algo].load(model_path) +# ACER raises errors because the environment passed must have +# the same number of environments as the model was trained on. +load_env = None if algo == 'acer' else env +model = ALGOS[algo].load(model_path, env=load_env) obs = env.reset() diff --git a/hyperparams/ddpg.yml b/hyperparams/ddpg.yml index 7141210..87a8749 100644 --- a/hyperparams/ddpg.yml +++ b/hyperparams/ddpg.yml @@ -19,6 +19,7 @@ Pendulum-v0: noise_std: 0.1 memory_limit: 50000 +# To be tuned BipedalWalker-v2: n_timesteps: !!float 5e6 policy: 'LnMlpPolicy' @@ -27,6 +28,7 @@ BipedalWalker-v2: noise_std: 0.2 memory_limit: 50000 +# To be tuned Walker2DBulletEnv-v0: n_timesteps: !!float 2e6 policy: 'LnMlpPolicy' @@ -36,6 +38,7 @@ Walker2DBulletEnv-v0: batch_size: 64 normalize_observations: True +# To be tuned HalfCheetahBulletEnv-v0: n_timesteps: !!float 2e6 policy: 'LnMlpPolicy' diff --git a/hyperparams/ppo2.yml b/hyperparams/ppo2.yml index 349f3da..2de10c9 100644 --- a/hyperparams/ppo2.yml +++ b/hyperparams/ppo2.yml @@ -220,6 +220,7 @@ MinitaurBulletDuckEnv-v0: learning_rate: 2.5e-4 cliprange: 0.2 +# To be tuned HumanoidBulletEnv-v0: normalize: true n_envs: 8 diff --git a/hyperparams/sac.yml b/hyperparams/sac.yml index 1f8d2e1..e53b6b0 100644 --- a/hyperparams/sac.yml +++ b/hyperparams/sac.yml @@ -5,7 +5,7 @@ MountainCarContinuous-v0: learning_rate: lin_3e-4 buffer_size: 1000000 batch_size: 64 - ent_coef: 0.01 + ent_coef: 'auto' train_freq: 1 gradient_steps: 1 learning_starts: 10000 @@ -13,16 +13,12 @@ MountainCarContinuous-v0: Pendulum-v0: n_timesteps: !!float 60000 policy: 'MlpPolicy' - ent_coef: 0.2 learning_starts: 1000 LunarLanderContinuous-v2: - n_timesteps: !!float 1e5 + n_timesteps: !!float 5e5 policy: 'MlpPolicy' - learning_rate: !!float 3e-3 - buffer_size: 50000 - batch_size: 32 - ent_coef: 0.2 + batch_size: 256 learning_starts: 1000 BipedalWalker-v2: @@ -53,7 +49,7 @@ HalfCheetahBulletEnv-v0: learning_rate: lin_3e-4 buffer_size: 1000000 batch_size: 64 - ent_coef: 0.01 + ent_coef: 'auto' train_freq: 1 gradient_steps: 1 learning_starts: 10000 @@ -103,12 +99,13 @@ ReacherBulletEnv-v0: learning_starts: 1000 HumanoidBulletEnv-v0: + normalize: "{'norm_obs': True, 'norm_reward': False}" n_timesteps: !!float 2e7 policy: 'CustomSACPolicy' learning_rate: lin_3e-4 buffer_size: 1000000 batch_size: 64 - ent_coef: 0.01 + ent_coef: 'auto' train_freq: 1 gradient_steps: 1 learning_starts: 1000 @@ -135,26 +132,29 @@ InvertedPendulumSwingupBulletEnv-v0: gradient_steps: 1 learning_starts: 1000 +# To be tuned MinitaurBulletEnv-v0: normalize: "{'norm_obs': True, 'norm_reward': False}" n_timesteps: !!float 1e6 policy: 'CustomSACPolicy' learning_rate: lin_3e-4 buffer_size: 1000000 - batch_size: 256 - ent_coef: 0.05 + batch_size: 64 + ent_coef: 'auto' + # ent_coef: 0.0003 train_freq: 1 gradient_steps: 1 learning_starts: 1000 +# To be tuned MinitaurBulletDuckEnv-v0: - normalize: "{'norm_obs': True, 'norm_reward': False}" + # normalize: "{'norm_obs': True, 'norm_reward': False}" n_timesteps: !!float 1e6 policy: 'CustomSACPolicy' learning_rate: lin_3e-4 buffer_size: 1000000 batch_size: 256 - ent_coef: 0.05 + ent_coef: 'auto' train_freq: 1 gradient_steps: 1 learning_starts: 1000 diff --git a/trained_agents/sac/LunarLanderContinuous-v2.pkl b/trained_agents/sac/LunarLanderContinuous-v2.pkl index ba5c670..1f96385 100644 Binary files a/trained_agents/sac/LunarLanderContinuous-v2.pkl and b/trained_agents/sac/LunarLanderContinuous-v2.pkl differ diff --git a/trained_agents/sac/LunarLanderContinuous-v2/config.yml b/trained_agents/sac/LunarLanderContinuous-v2/config.yml index 93bf187..26c3f63 100644 --- a/trained_agents/sac/LunarLanderContinuous-v2/config.yml +++ b/trained_agents/sac/LunarLanderContinuous-v2/config.yml @@ -1,8 +1,5 @@ !!python/object/apply:collections.OrderedDict -- - [batch_size, 32] - - [buffer_size, 50000] - - [ent_coef, 0.2] - - [learning_rate, 0.003] +- - [batch_size, 256] - [learning_starts, 1000] - - [n_timesteps, 100000.0] + - [n_timesteps, 500000.0] - [policy, MlpPolicy]