diff --git a/.travis.yml b/.travis.yml
index 44884a8..b6bc993 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,11 +5,28 @@ python:
 notifications:
   email: false
 
+env:
+  global:
+    - DOCKER_IMAGE=araffin/rl-baselines-zoo-cpu:v2.8.0
+
 services:
   - docker
 
 install:
-  - docker pull araffin/rl-baselines-zoo-cpu
+  - docker pull ${DOCKER_IMAGE}
 
 script:
-  - docker run -it --rm --network host --ipc=host --mount src=$(pwd),target=/root/code/stable-baselines,type=bind araffin/rl-baselines-zoo-cpu bash -c "cd /root/code/stable-baselines/ && pip install --upgrade git+https://github.com/pfnet/optuna.git && python -m pytest --cov-config .coveragerc --cov-report term --cov=. -v tests/"
+  - ./scripts/run_tests_travis.sh "${TEST_GLOB}"
+
+jobs:
+  include:
+    # Split test suite to avoid exceeding travis limit
+    - stage: Test
+      name: "Unit Tests Train"
+      env: TEST_GLOB="train.py"
+
+    - name: "Unit Tests Enjoy"
+      env: TEST_GLOB="enjoy.py"
+
+    - name: "Unit Tests Hyperparams opt"
+      env: TEST_GLOB="hyperparams_opt.py"
diff --git a/README.md b/README.md
index 6fe9706..42bb212 100644
--- a/README.md
+++ b/README.md
@@ -62,14 +62,14 @@ mpirun -n 16 python train.py --algo trpo --env BreakoutNoFrameskip-v4
 
 We use [Optuna](https://optuna.org/) for optimizing the hyperparameters.
 
-Note: hyperparameters search is only implemented for PPO2/A2C/SAC/TRPO/DDPG for now.
+Note: hyperparameters search is not implemented for ACER and DQN for now.
 when using SuccessiveHalvingPruner ("halving"), you must specify `--n-jobs > 1`
 
 Budget of 1000 trials with a maximum of 50000 steps:
 
 ```
 python train.py --algo ppo2 --env MountainCar-v0 -n 50000 -optimize --n-trials 1000 --n-jobs 2 \
-  --sampler random --pruner median
+  --sampler tpe --pruner median
 ```
 
 
@@ -116,7 +116,7 @@ Additional Atari Games (to be completed):
 |----------|--------------|----------------|------------|--------------|--------------------------|
 | A2C      | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | ACER     | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | N/A | N/A |
-| ACKTR    | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | N/A | N/A |
+| ACKTR    | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | PPO2     | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | DQN      | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | N/A | N/A |
 | DDPG     |  N/A |  N/A  | N/A | :heavy_check_mark: | :heavy_check_mark: |
@@ -129,15 +129,15 @@ Additional Atari Games (to be completed):
 
 |  RL Algo |  BipedalWalker-v2 | LunarLander-v2 | LunarLanderContinuous-v2 |  BipedalWalkerHardcore-v2 | CarRacing-v0 |
 |----------|--------------|----------------|------------|--------------|--------------------------|
-| A2C      | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |                   |
+| A2C      | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | |
 | ACER     | N/A | :heavy_check_mark: | N/A | N/A | N/A |
-| ACKTR    | N/A | :heavy_check_mark: | N/A | N/A | N/A |
-| PPO2     | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |   |
+| ACKTR    | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | |
+| PPO2     | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | |
 | DQN      | N/A | :heavy_check_mark: | N/A | N/A | N/A |
-| DDPG     | :heavy_check_mark: | N/A | :heavy_check_mark: | |  |
+| DDPG     | :heavy_check_mark: | N/A | :heavy_check_mark: | | |
 | SAC      | :heavy_check_mark: | N/A | :heavy_check_mark: | :heavy_check_mark: | |
-| TD3      | | N/A | :heavy_check_mark: | | |
-| TRPO     | | :heavy_check_mark: | :heavy_check_mark: | | |
+| TD3      | :heavy_check_mark: | N/A | :heavy_check_mark: | | |
+| TRPO     | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | |
 
 ### PyBullet Environments
 
@@ -149,6 +149,7 @@ Note: those environments are derived from [Roboschool](https://github.com/openai
 |  RL Algo |  Walker2D | HalfCheetah | Ant | Reacher |  Hopper | Humanoid |
 |----------|-----------|-------------|-----|---------|---------|----------|
 | A2C      | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | :heavy_check_mark: | |
+| ACKTR    | | :heavy_check_mark: | | | | |
 | PPO2     | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | DDPG     | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | | |
 | SAC      | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
@@ -160,6 +161,7 @@ PyBullet Envs (Continued)
 |  RL Algo |  Minitaur | MinitaurDuck | InvertedDoublePendulum | InvertedPendulumSwingup |
 |----------|-----------|-------------|-----|---------|
 | A2C      | | | | |
+| ACKTR    | | | | |
 | PPO2     | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
 | DDPG     | | | | |
 | SAC      | | | :heavy_check_mark: | :heavy_check_mark: |
@@ -209,11 +211,11 @@ You can train agents online using [colab notebook](https://colab.research.google
 
 ### Stable-Baselines PyPi Package
 
-Min version: stable-baselines >= 2.7.0
+Min version: stable-baselines[mpi] >= 2.8.0
 
 ```
 apt-get install swig cmake libopenmpi-dev zlib1g-dev ffmpeg
-pip install stable-baselines box2d box2d-kengz pyyaml pybullet optuna pytablewriter scikit-optimize
+pip install stable-baselines[mpi] box2d box2d-kengz pyyaml pybullet optuna pytablewriter scikit-optimize
 ```
 
 Please see [Stable Baselines README](https://github.com/hill-a/stable-baselines) for alternatives.
diff --git a/benchmark.md b/benchmark.md
index 8fdf98d..4edc701 100644
--- a/benchmark.md
+++ b/benchmark.md
@@ -34,12 +34,18 @@
 |acer |SpaceInvadersNoFrameskip-v4        |    542.556|   172.332|     150374|       133|
 |acktr|Acrobot-v1                         |    -91.284|    32.515|     149959|      1625|
 |acktr|BeamRiderNoFrameskip-v4            |   3760.976|  1826.059|     147414|        41|
+|acktr|BipedalWalker-v2                   |    292.419|    54.373|     149881|       216|
+|acktr|BipedalWalkerHardcore-v2           |     44.796|   113.898|     149216|       129|
 |acktr|BreakoutNoFrameskip-v4             |    448.514|    88.882|     143118|        37|
 |acktr|CartPole-v1                        |    487.573|    63.866|     149685|       307|
 |acktr|EnduroNoFrameskip-v4               |      0.000|     0.000|     149574|        45|
+|acktr|HalfCheetahBulletEnv-v0            |   2535.255|   110.368|     150000|       150|
 |acktr|LunarLander-v2                     |     96.822|    64.020|     149905|       176|
+|acktr|LunarLanderContinuous-v2           |    239.953|    58.406|     149825|       480|
 |acktr|MountainCar-v0                     |   -111.917|    21.422|     149969|      1340|
+|acktr|MountainCarContinuous-v0           |     93.779|     0.115|     149993|      2265|
 |acktr|MsPacmanNoFrameskip-v4             |   1598.776|   264.338|     149588|       147|
+|acktr|Pendulum-v0                        |   -213.831|   137.857|     150000|       750|
 |acktr|PongNoFrameskip-v4                 |     19.224|     3.697|     147753|        67|
 |acktr|QbertNoFrameskip-v4                |   9569.575|  3980.468|     150896|       106|
 |acktr|SeaquestNoFrameskip-v4             |   1672.239|   105.092|     149148|        67|
@@ -104,6 +110,7 @@
 |sac  |ReacherBulletEnv-v0                |     17.529|     9.860|     150000|      1000|
 |sac  |Walker2DBulletEnv-v0               |   2052.646|    13.631|     150000|       150|
 |td3  |AntBulletEnv-v0                    |   3269.021|    60.697|     150000|       150|
+|td3  |BipedalWalker-v2                   |    308.793|    23.750|     149713|       228|
 |td3  |HalfCheetahBulletEnv-v0            |   3160.318|    15.284|     150000|       150|
 |td3  |HopperBulletEnv-v0                 |   2743.910|    20.159|     150000|       150|
 |td3  |HumanoidBulletEnv-v0               |   1638.081|   801.594|     149453|       182|
diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu
index fdcf11d..620cb96 100644
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@@ -22,7 +22,7 @@ RUN \
     pip install pytest-cov && \
     pip install pyyaml && \
     pip install box2d-py==2.3.5 && \
-    pip install stable-baselines && \
+    pip install stable-baselines[mpi]==2.8.0 && \
     pip install pybullet && \
     pip install gym-minigrid && \
     pip install scikit-optimize && \
diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu
index cbed120..47fd9b3 100644
--- a/docker/Dockerfile.gpu
+++ b/docker/Dockerfile.gpu
@@ -22,7 +22,7 @@ RUN \
     pip install pyyaml && \
     pip install box2d-py==2.3.5 && \
     pip install tensorflow-gpu==1.8.0 && \
-    pip install stable-baselines && \
+    pip install stable-baselines[mpi]==2.8.0 && \
     pip install pybullet && \
     pip install gym-minigrid && \
     pip install scikit-optimize && \
diff --git a/enjoy.py b/enjoy.py
index 7b8e421..d0d4062 100644
--- a/enjoy.py
+++ b/enjoy.py
@@ -73,10 +73,18 @@ def main():
     else:
         log_path = os.path.join(folder, algo)
 
-    model_path = "{}/{}.pkl".format(log_path, env_id)
 
     assert os.path.isdir(log_path), "The {} folder was not found".format(log_path)
-    assert os.path.isfile(model_path), "No model found for {} on {}, path: {}".format(algo, env_id, model_path)
+
+    found = False
+    for ext in ['pkl', 'zip']:
+        model_path = "{}/{}.{}".format(log_path, env_id, ext)
+        found = os.path.isfile(model_path)
+        if found:
+            break
+
+    if not found:
+        raise ValueError("No model found for {} on {}, path: {}".format(algo, env_id, model_path))
 
     if algo in ['dqn', 'ddpg', 'sac', 'td3']:
         args.n_envs = 1
diff --git a/hyperparams/acktr.yml b/hyperparams/acktr.yml
index f22435e..3e605af 100644
--- a/hyperparams/acktr.yml
+++ b/hyperparams/acktr.yml
@@ -32,3 +32,105 @@ Acrobot-v1:
   n_timesteps: !!float 5e5
   policy: 'MlpPolicy'
   ent_coef: 0.0
+
+Pendulum-v0:
+  n_envs: 4
+  n_timesteps: !!float 2e6
+  policy: 'MlpPolicy'
+  ent_coef: 0.0
+  gamma: 0.99
+  n_steps: 16
+  learning_rate: 0.06
+  lr_schedule: 'constant'
+
+LunarLanderContinuous-v2:
+  normalize: true
+  n_envs: 8
+  n_timesteps: !!float 5e6
+  policy: 'MlpPolicy'
+  gamma: 0.99
+  n_steps: 16
+  ent_coef: 0.0
+  learning_rate: 0.06
+  lr_schedule: 'constant'
+
+MountainCarContinuous-v0:
+  normalize: true
+  n_envs: 16
+  n_timesteps: !!float 3e5
+  policy: 'MlpPolicy'
+  ent_coef: 0.0
+
+# Tuned
+HalfCheetahBulletEnv-v0:
+  env_wrapper: utils.wrappers.TimeFeatureWrapper
+  normalize: True
+  n_envs: 1
+  n_timesteps: !!float 2e6
+  policy: 'MlpPolicy'
+  ent_coef: 0.0
+  lr_schedule: 'constant'
+  learning_rate: 0.0217
+  n_steps: 128
+  nprocs: 4
+  max_grad_norm: 0.5
+  gamma: 0.98
+  vf_coef: 0.946
+
+# TO BE tuned
+Walker2DBulletEnv-v0:
+  env_wrapper: utils.wrappers.TimeFeatureWrapper
+  normalize: True
+  n_envs: 1
+  n_timesteps: !!float 2e6
+  policy: 'MlpPolicy'
+  ent_coef: 0.0
+  # lr_schedule: 'constant'
+  # learning_rate: 0.0217
+  n_steps: 128
+  nprocs: 4
+  gamma: 0.99
+  vf_coef: 0.946
+
+
+HalfCheetah-v2:
+  env_wrapper: utils.wrappers.TimeFeatureWrapper
+  normalize: True
+  n_envs: 1
+  n_timesteps: !!float 1e6
+  policy: 'MlpPolicy'
+  ent_coef: 0.0
+  lr_schedule: 'constant'
+  learning_rate: 0.2
+  n_steps: 2048
+  nprocs: 4
+  max_grad_norm: 10
+  gamma: 0.99
+  vf_coef: 0.5
+  policy_kwargs: "dict(net_arch=[256, 256])"
+
+# Tuned
+BipedalWalkerHardcore-v2:
+  normalize: true
+  n_envs: 8
+  n_timesteps: !!float 10e7
+  policy: 'MlpPolicy'
+  ent_coef: 0.000125
+  lr_schedule: 'constant'
+  learning_rate: 0.0675
+  n_steps: 16
+  gamma: 0.9999
+  vf_coef: 0.51
+
+# Tuned
+BipedalWalker-v2:
+  normalize: true
+  n_envs: 8
+  n_timesteps: !!float 5e6
+  policy: 'MlpPolicy'
+  ent_coef: 0.0
+  lr_schedule: 'constant'
+  learning_rate: 0.298
+  n_steps: 32
+  gamma: 0.98
+  vf_coef: 0.38
diff --git a/hyperparams/td3.yml b/hyperparams/td3.yml
index f877ab2..77ec58b 100644
--- a/hyperparams/td3.yml
+++ b/hyperparams/td3.yml
@@ -49,6 +49,20 @@ HalfCheetahBulletEnv-v0:
   gradient_steps: 1000
   policy_kwargs: "dict(layers=[400, 300])"
 
+BipedalWalker-v2:
+  n_timesteps: !!float 2e6
+  policy: 'MlpPolicy'
+  gamma: 0.99
+  buffer_size: 1000000
+  noise_type: 'normal'
+  noise_std: 0.1
+  learning_starts: 10000
+  batch_size: 100
+  learning_rate: !!float 1e-3
+  train_freq: 1000
+  gradient_steps: 1000
+  policy_kwargs: "dict(layers=[400, 300])"
+
 # To be tuned
 BipedalWalkerHardcore-v2:
   n_timesteps: !!float 5e7
@@ -59,7 +73,7 @@ BipedalWalkerHardcore-v2:
   noise_std: 0.2
   learning_starts: 10000
   batch_size: 100
-  learning_rate: 1e-3
+  learning_rate: !!float 1e-3
   train_freq: 1000
   gradient_steps: 1000
   policy_kwargs: "dict(layers=[400, 300])"
diff --git a/run_docker_cpu.sh b/run_docker_cpu.sh
index 902eecd..4001969 100755
--- a/run_docker_cpu.sh
+++ b/run_docker_cpu.sh
@@ -8,5 +8,5 @@ echo $cmd_line
 
 
 docker run -it --rm --network host --ipc=host \
- --mount src=$(pwd),target=/root/code/stable-baselines,type=bind araffin/rl-baselines-zoo-cpu\
+ --mount src=$(pwd),target=/root/code/stable-baselines,type=bind araffin/rl-baselines-zoo-cpu:v2.8.0\
   bash -c "cd /root/code/stable-baselines/ && $cmd_line"
diff --git a/run_docker_gpu.sh b/run_docker_gpu.sh
old mode 100644
new mode 100755
index ec6a1bb..f288224
--- a/run_docker_gpu.sh
+++ b/run_docker_gpu.sh
@@ -8,5 +8,5 @@ echo $cmd_line
 
 
 docker run -it --runtime=nvidia --rm --network host --ipc=host \
-  --mount src=$(pwd),target=/root/code/stable-baselines,type=bind araffin/rl-baselines-zoo\
+  --mount src=$(pwd),target=/root/code/stable-baselines,type=bind araffin/rl-baselines-zoo:v2.8.0\
   bash -c "cd /root/code/stable-baselines/ && $cmd_line"
diff --git a/scripts/run_tests_travis.sh b/scripts/run_tests_travis.sh
new file mode 100755
index 0000000..4f01313
--- /dev/null
+++ b/scripts/run_tests_travis.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+DOCKER_CMD="docker run -it --rm --network host --ipc=host --mount src=$(pwd),target=/root/code/stable-baselines,type=bind"
+BASH_CMD="cd /root/code/stable-baselines/"
+
+if [[ $# -ne 1 ]]; then
+  echo "usage: $0 <test glob>"
+  exit 1
+fi
+
+if [[ ${DOCKER_IMAGE} = "" ]]; then
+  echo "Need DOCKER_IMAGE environment variable to be set."
+  exit 1
+fi
+
+TEST_GLOB=$1
+
+set -e  # exit immediately on any error
+
+
+${DOCKER_CMD} ${DOCKER_IMAGE} \
+    bash -c "${BASH_CMD} && \
+             python -m pytest --cov-config .coveragerc --cov-report term --cov=. -v tests/test_${TEST_GLOB}"
diff --git a/tests/test_hyperparams_opt.py b/tests/test_hyperparams_opt.py
index 096c1a5..d251901 100644
--- a/tests/test_hyperparams_opt.py
+++ b/tests/test_hyperparams_opt.py
@@ -13,9 +13,9 @@ def _assert_eq(left, right):
 N_TRIALS = 2
 N_JOBS = 1
 
-ALGOS = ('ppo2', 'a2c', 'trpo')
+ALGOS = ('ppo2', 'a2c', 'trpo', 'acktr')
 # Not yet supported:
-# ALGOS = ('acer', 'acktr', 'dqn')
+# ALGOS = ('acer', 'dqn')
 ENV_IDS = ('CartPole-v1',)
 LOG_FOLDER = 'logs/tests_optimize/'
 
@@ -29,6 +29,8 @@ def _assert_eq(left, right):
 experiments['ddpg-MountainCarContinuous-v0'] = ('ddpg', 'MountainCarContinuous-v0')
 # Test for SAC
 experiments['sac-Pendulum-v0'] = ('sac', 'Pendulum-v0')
+# Test for TD3
+experiments['td3-Pendulum-v0'] = ('td3', 'Pendulum-v0')
 
 # Clean up
 if os.path.isdir(LOG_FOLDER):
diff --git a/train.py b/train.py
index 1fd7d34..27b3187 100644
--- a/train.py
+++ b/train.py
@@ -51,9 +51,9 @@
                         help='Run hyperparameters search')
     parser.add_argument('--n-jobs', help='Number of parallel jobs when optimizing hyperparameters', type=int, default=1)
     parser.add_argument('--sampler', help='Sampler to use when optimizing hyperparameters', type=str,
-                        default='skopt', choices=['random', 'tpe', 'skopt'])
+                        default='tpe', choices=['random', 'tpe', 'skopt'])
     parser.add_argument('--pruner', help='Pruner to use when optimizing hyperparameters', type=str,
-                        default='none', choices=['halving', 'median', 'none'])
+                        default='median', choices=['halving', 'median', 'none'])
     parser.add_argument('--verbose', help='Verbose mode (0: no output, 1: INFO)', default=1,
                         type=int)
     parser.add_argument('--gym-packages', type=str, nargs='+', default=[], help='Additional external Gym environemnt package modules to import (e.g. gym_minigrid)')
diff --git a/trained_agents/acktr/BipedalWalker-v2.zip b/trained_agents/acktr/BipedalWalker-v2.zip
new file mode 100644
index 0000000..1cee58f
Binary files /dev/null and b/trained_agents/acktr/BipedalWalker-v2.zip differ
diff --git a/trained_agents/acktr/BipedalWalker-v2/config.yml b/trained_agents/acktr/BipedalWalker-v2/config.yml
new file mode 100644
index 0000000..22cb148
--- /dev/null
+++ b/trained_agents/acktr/BipedalWalker-v2/config.yml
@@ -0,0 +1,11 @@
+!!python/object/apply:collections.OrderedDict
+- - [ent_coef, 0.0]
+  - [gamma, 0.98]
+  - [learning_rate, 0.298]
+  - [lr_schedule, constant]
+  - [n_envs, 8]
+  - [n_steps, 32]
+  - [n_timesteps, 5000000.0]
+  - [normalize, true]
+  - [policy, MlpPolicy]
+  - [vf_coef, 0.38]
diff --git a/trained_agents/acktr/BipedalWalker-v2/obs_rms.pkl b/trained_agents/acktr/BipedalWalker-v2/obs_rms.pkl
new file mode 100644
index 0000000..9cb6bc1
Binary files /dev/null and b/trained_agents/acktr/BipedalWalker-v2/obs_rms.pkl differ
diff --git a/trained_agents/acktr/BipedalWalker-v2/ret_rms.pkl b/trained_agents/acktr/BipedalWalker-v2/ret_rms.pkl
new file mode 100644
index 0000000..611c4e0
Binary files /dev/null and b/trained_agents/acktr/BipedalWalker-v2/ret_rms.pkl differ
diff --git a/trained_agents/acktr/BipedalWalkerHardcore-v2.zip b/trained_agents/acktr/BipedalWalkerHardcore-v2.zip
new file mode 100644
index 0000000..057e2b7
Binary files /dev/null and b/trained_agents/acktr/BipedalWalkerHardcore-v2.zip differ
diff --git a/trained_agents/acktr/BipedalWalkerHardcore-v2/config.yml b/trained_agents/acktr/BipedalWalkerHardcore-v2/config.yml
new file mode 100644
index 0000000..c83e60f
--- /dev/null
+++ b/trained_agents/acktr/BipedalWalkerHardcore-v2/config.yml
@@ -0,0 +1,11 @@
+!!python/object/apply:collections.OrderedDict
+- - [ent_coef, 0.000125]
+  - [gamma, 0.9999]
+  - [learning_rate, 0.0675]
+  - [lr_schedule, constant]
+  - [n_envs, 8]
+  - [n_steps, 16]
+  - [n_timesteps, 100000000.0]
+  - [normalize, true]
+  - [policy, MlpPolicy]
+  - [vf_coef, 0.51]
diff --git a/trained_agents/acktr/BipedalWalkerHardcore-v2/obs_rms.pkl b/trained_agents/acktr/BipedalWalkerHardcore-v2/obs_rms.pkl
new file mode 100644
index 0000000..e5064df
Binary files /dev/null and b/trained_agents/acktr/BipedalWalkerHardcore-v2/obs_rms.pkl differ
diff --git a/trained_agents/acktr/BipedalWalkerHardcore-v2/ret_rms.pkl b/trained_agents/acktr/BipedalWalkerHardcore-v2/ret_rms.pkl
new file mode 100644
index 0000000..1210735
Binary files /dev/null and b/trained_agents/acktr/BipedalWalkerHardcore-v2/ret_rms.pkl differ
diff --git a/trained_agents/acktr/HalfCheetahBulletEnv-v0.zip b/trained_agents/acktr/HalfCheetahBulletEnv-v0.zip
new file mode 100644
index 0000000..f2f0b34
Binary files /dev/null and b/trained_agents/acktr/HalfCheetahBulletEnv-v0.zip differ
diff --git a/trained_agents/acktr/HalfCheetahBulletEnv-v0/config.yml b/trained_agents/acktr/HalfCheetahBulletEnv-v0/config.yml
new file mode 100644
index 0000000..ea8d97c
--- /dev/null
+++ b/trained_agents/acktr/HalfCheetahBulletEnv-v0/config.yml
@@ -0,0 +1,14 @@
+!!python/object/apply:collections.OrderedDict
+- - [ent_coef, 0.0]
+  - [env_wrapper, utils.wrappers.TimeFeatureWrapper]
+  - [gamma, 0.98]
+  - [learning_rate, 0.0217]
+  - [lr_schedule, constant]
+  - [max_grad_norm, 0.5]
+  - [n_envs, 1]
+  - [n_steps, 128]
+  - [n_timesteps, 2000000.0]
+  - [normalize, true]
+  - [nprocs, 4]
+  - [policy, MlpPolicy]
+  - [vf_coef, 0.946]
diff --git a/trained_agents/acktr/HalfCheetahBulletEnv-v0/obs_rms.pkl b/trained_agents/acktr/HalfCheetahBulletEnv-v0/obs_rms.pkl
new file mode 100644
index 0000000..030680a
Binary files /dev/null and b/trained_agents/acktr/HalfCheetahBulletEnv-v0/obs_rms.pkl differ
diff --git a/trained_agents/acktr/HalfCheetahBulletEnv-v0/ret_rms.pkl b/trained_agents/acktr/HalfCheetahBulletEnv-v0/ret_rms.pkl
new file mode 100644
index 0000000..d499aaf
Binary files /dev/null and b/trained_agents/acktr/HalfCheetahBulletEnv-v0/ret_rms.pkl differ
diff --git a/trained_agents/acktr/LunarLanderContinuous-v2.zip b/trained_agents/acktr/LunarLanderContinuous-v2.zip
new file mode 100644
index 0000000..8a81eaf
Binary files /dev/null and b/trained_agents/acktr/LunarLanderContinuous-v2.zip differ
diff --git a/trained_agents/acktr/LunarLanderContinuous-v2/config.yml b/trained_agents/acktr/LunarLanderContinuous-v2/config.yml
new file mode 100644
index 0000000..ea412d2
--- /dev/null
+++ b/trained_agents/acktr/LunarLanderContinuous-v2/config.yml
@@ -0,0 +1,10 @@
+!!python/object/apply:collections.OrderedDict
+- - [ent_coef, 0.0]
+  - [gamma, 0.99]
+  - [learning_rate, 0.06]
+  - [lr_schedule, constant]
+  - [n_envs, 8]
+  - [n_steps, 16]
+  - [n_timesteps, 5000000.0]
+  - [normalize, true]
+  - [policy, MlpPolicy]
diff --git a/trained_agents/acktr/LunarLanderContinuous-v2/obs_rms.pkl b/trained_agents/acktr/LunarLanderContinuous-v2/obs_rms.pkl
new file mode 100644
index 0000000..69efc9c
Binary files /dev/null and b/trained_agents/acktr/LunarLanderContinuous-v2/obs_rms.pkl differ
diff --git a/trained_agents/acktr/LunarLanderContinuous-v2/ret_rms.pkl b/trained_agents/acktr/LunarLanderContinuous-v2/ret_rms.pkl
new file mode 100644
index 0000000..33301f7
Binary files /dev/null and b/trained_agents/acktr/LunarLanderContinuous-v2/ret_rms.pkl differ
diff --git a/trained_agents/acktr/MountainCarContinuous-v0.zip b/trained_agents/acktr/MountainCarContinuous-v0.zip
new file mode 100644
index 0000000..8443d85
Binary files /dev/null and b/trained_agents/acktr/MountainCarContinuous-v0.zip differ
diff --git a/trained_agents/acktr/MountainCarContinuous-v0/config.yml b/trained_agents/acktr/MountainCarContinuous-v0/config.yml
new file mode 100644
index 0000000..705df27
--- /dev/null
+++ b/trained_agents/acktr/MountainCarContinuous-v0/config.yml
@@ -0,0 +1,6 @@
+!!python/object/apply:collections.OrderedDict
+- - [ent_coef, 0.0]
+  - [n_envs, 16]
+  - [n_timesteps, 1000000.0]
+  - [normalize, true]
+  - [policy, MlpPolicy]
diff --git a/trained_agents/acktr/MountainCarContinuous-v0/obs_rms.pkl b/trained_agents/acktr/MountainCarContinuous-v0/obs_rms.pkl
new file mode 100644
index 0000000..a67bd39
Binary files /dev/null and b/trained_agents/acktr/MountainCarContinuous-v0/obs_rms.pkl differ
diff --git a/trained_agents/acktr/MountainCarContinuous-v0/ret_rms.pkl b/trained_agents/acktr/MountainCarContinuous-v0/ret_rms.pkl
new file mode 100644
index 0000000..a37c739
Binary files /dev/null and b/trained_agents/acktr/MountainCarContinuous-v0/ret_rms.pkl differ
diff --git a/trained_agents/acktr/Pendulum-v0.zip b/trained_agents/acktr/Pendulum-v0.zip
new file mode 100644
index 0000000..1698a51
Binary files /dev/null and b/trained_agents/acktr/Pendulum-v0.zip differ
diff --git a/trained_agents/acktr/Pendulum-v0/config.yml b/trained_agents/acktr/Pendulum-v0/config.yml
new file mode 100644
index 0000000..6b40814
--- /dev/null
+++ b/trained_agents/acktr/Pendulum-v0/config.yml
@@ -0,0 +1,9 @@
+!!python/object/apply:collections.OrderedDict
+- - [ent_coef, 0.0]
+  - [gamma, 0.99]
+  - [learning_rate, 0.06]
+  - [lr_schedule, constant]
+  - [n_envs, 4]
+  - [n_steps, 16]
+  - [n_timesteps, 2000000.0]
+  - [policy, MlpPolicy]
diff --git a/trained_agents/td3/BipedalWalker-v2.zip b/trained_agents/td3/BipedalWalker-v2.zip
new file mode 100644
index 0000000..3cca4d6
Binary files /dev/null and b/trained_agents/td3/BipedalWalker-v2.zip differ
diff --git a/trained_agents/td3/BipedalWalker-v2/config.yml b/trained_agents/td3/BipedalWalker-v2/config.yml
new file mode 100644
index 0000000..418e84b
--- /dev/null
+++ b/trained_agents/td3/BipedalWalker-v2/config.yml
@@ -0,0 +1,13 @@
+!!python/object/apply:collections.OrderedDict
+- - [batch_size, 100]
+  - [buffer_size, 1000000]
+  - [gamma, 0.99]
+  - [gradient_steps, 1000]
+  - [learning_rate, 0.001]
+  - [learning_starts, 10000]
+  - [n_timesteps, 2000000.0]
+  - [noise_std, 0.1]
+  - [noise_type, normal]
+  - [policy, MlpPolicy]
+  - [policy_kwargs, 'dict(layers=[400, 300])']
+  - [train_freq, 1000]
diff --git a/utils/benchmark.py b/utils/benchmark.py
index f1f4b2c..4603f07 100644
--- a/utils/benchmark.py
+++ b/utils/benchmark.py
@@ -45,7 +45,7 @@
     algo, env_id = trained_models[trained_model]
     n_envs = args.n_envs
     n_timesteps = args.n_timesteps
-    if algo in ['dqn', 'ddpg', 'sac']:
+    if algo in ['dqn', 'ddpg', 'sac', 'td3']:
         n_envs = 1
         n_timesteps *= args.n_envs
 
diff --git a/utils/hyperparams_opt.py b/utils/hyperparams_opt.py
index 386c40a..758ca71 100644
--- a/utils/hyperparams_opt.py
+++ b/utils/hyperparams_opt.py
@@ -5,7 +5,7 @@
 from optuna.pruners import SuccessiveHalvingPruner, MedianPruner
 from optuna.samplers import RandomSampler, TPESampler
 from optuna.integration.skopt import SkoptSampler
-from stable_baselines import SAC, DDPG
+from stable_baselines import SAC, DDPG, TD3
 from stable_baselines.ddpg import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
 from stable_baselines.common.vec_env import VecNormalize, VecEnv
 from stable_baselines.her import HERGoalEnvWrapper
@@ -77,8 +77,8 @@ def objective(trial):
         if algo == 'her':
             trial.model_class = hyperparams['model_class']
 
-        # Hack to use DDPG sampler
-        if algo == 'ddpg' or trial.model_class == 'ddpg':
+        # Hack to use DDPG/TD3 noise sampler
+        if algo in ['ddpg', 'td3'] or trial.model_class in ['ddpg', 'td3']:
             trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
         kwargs.update(algo_sampler(trial))
 
@@ -237,7 +237,7 @@ def sample_a2c_params(trial):
     :return: (dict)
     """
     gamma = trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
-    n_steps = trial.suggest_categorical('n_steps', [5, 16, 32, 64, 128, 256])
+    n_steps = trial.suggest_categorical('n_steps', [8, 16, 32, 64, 128, 256, 512, 1024, 2048])
     lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
     learning_rate = trial.suggest_loguniform('lr', 1e-5, 1)
     ent_coef = trial.suggest_loguniform('ent_coef', 0.00000001, 0.1)
@@ -254,6 +254,29 @@ def sample_a2c_params(trial):
         'vf_coef': vf_coef
     }
 
+def sample_acktr_params(trial):
+    """
+    Sampler for ACKTR hyperparams.
+
+    :param trial: (optuna.trial)
+    :return: (dict)
+    """
+    gamma = trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
+    n_steps = trial.suggest_categorical('n_steps', [16, 32, 64, 128, 256, 512, 1024, 2048])
+    lr_schedule = trial.suggest_categorical('lr_schedule', ['linear', 'constant'])
+    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1)
+    ent_coef = trial.suggest_loguniform('ent_coef', 0.00000001, 0.1)
+    vf_coef = trial.suggest_uniform('vf_coef', 0, 1)
+
+    return {
+        'n_steps': n_steps,
+        'gamma': gamma,
+        'learning_rate': learning_rate,
+        'lr_schedule': lr_schedule,
+        'ent_coef': ent_coef,
+        'vf_coef': vf_coef
+    }
+
 
 def sample_sac_params(trial):
     """
@@ -270,8 +293,7 @@ def sample_sac_params(trial):
     train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 300])
     # gradient_steps takes too much time
     # gradient_steps = trial.suggest_categorical('gradient_steps', [1, 100, 300])
-    # gradient_steps = 1
-    gradient_steps = trial.suggest_categorical('gradient_steps', [1, 2, 5])
+    gradient_steps = train_freq
     ent_coef = trial.suggest_categorical('ent_coef', ['auto', 0.5, 0.1, 0.05, 0.01, 0.0001])
 
     target_entropy = 'auto'
@@ -290,6 +312,39 @@ def sample_sac_params(trial):
         'target_entropy': target_entropy
     }
 
+def sample_td3_params(trial):
+    """
+    Sampler for TD3 hyperparams.
+
+    :param trial: (optuna.trial)
+    :return: (dict)
+    """
+    gamma = trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999])
+    learning_rate = trial.suggest_loguniform('lr', 1e-5, 1)
+    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 100, 128, 256, 512])
+    buffer_size = trial.suggest_categorical('buffer_size', [int(1e4), int(1e5), int(1e6)])
+    train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 1000, 2000])
+    gradient_steps = train_freq
+    noise_type = trial.suggest_categorical('noise_type', ['ornstein-uhlenbeck', 'normal'])
+    noise_std = trial.suggest_uniform('noise_std', 0, 1)
+
+    hyperparams = {
+        'gamma': gamma,
+        'learning_rate': learning_rate,
+        'batch_size': batch_size,
+        'buffer_size': buffer_size,
+        'train_freq': train_freq,
+        'gradient_steps': gradient_steps,
+    }
+
+    if noise_type == 'normal':
+        hyperparams['action_noise'] = NormalActionNoise(mean=np.zeros(trial.n_actions),
+                                                        sigma=noise_std * np.ones(trial.n_actions))
+    elif noise_type == 'ornstein-uhlenbeck':
+        hyperparams['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(trial.n_actions),
+                                                                   sigma=noise_std * np.ones(trial.n_actions))
+
+    return hyperparams
 
 def sample_trpo_params(trial):
     """
@@ -375,6 +430,8 @@ def sample_her_params(trial):
         hyperparams = sample_sac_params(trial)
     elif trial.model_class == DDPG:
         hyperparams = sample_ddpg_params(trial)
+    elif trial.model_class == TD3:
+        hyperparams = sample_td3_params(trial)
 
     hyperparams['random_exploration'] = trial.suggest_uniform('random_exploration', 0, 1)
     hyperparams['n_sampled_goal'] = trial.suggest_categorical('n_sampled_goal', [1, 2, 4, 6, 8])
@@ -388,5 +445,7 @@ def sample_her_params(trial):
     'a2c': sample_a2c_params,
     'trpo': sample_trpo_params,
     'ddpg': sample_ddpg_params,
-    'her': sample_her_params
+    'her': sample_her_params,
+    'acktr': sample_acktr_params,
+    'td3': sample_td3_params
 }
diff --git a/utils/utils.py b/utils/utils.py
index e193eed..419ea18 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -251,10 +251,11 @@ def get_trained_models(log_folder):
     algos = os.listdir(log_folder)
     trained_models = {}
     for algo in algos:
-        for env_id in glob.glob('{}/{}/*.pkl'.format(log_folder, algo)):
-            # Retrieve env name
-            env_id = env_id.split('/')[-1].split('.pkl')[0]
-            trained_models['{}-{}'.format(algo, env_id)] = (algo, env_id)
+        for ext in ['zip', 'pkl']:
+            for env_id in glob.glob('{}/{}/*.{}'.format(log_folder, algo, ext)):
+                # Retrieve env name
+                env_id = env_id.split('/')[-1].split('.{}'.format(ext))[0]
+                trained_models['{}-{}'.format(algo, env_id)] = (algo, env_id)
     return trained_models