From 24b26439fcedd0ff787c46dde861c32a9be27460 Mon Sep 17 00:00:00 2001
From: BDonnot <benjamin.donnot@gmail.com>
Date: Wed, 12 Aug 2020 08:21:57 +0200
Subject: [PATCH 1/3] some fixes for the leap net encoded

---
 .../LeapNetEncoded/LeapNetEncoded_NN.py       |  2 +-
 .../LeapNetEncoded/LeapNetEncoded_NNParam.py  |  9 +++
 l2rpn_baselines/LeapNetEncoded/study.py       |  8 +--
 l2rpn_baselines/LeapNetEncoded/train.py       |  5 +-
 l2rpn_baselines/utils/NNParam.py              | 70 ++++++++++---------
 5 files changed, 54 insertions(+), 40 deletions(-)

diff --git a/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NN.py b/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NN.py
index e30a0bc..05f75c6 100644
--- a/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NN.py
+++ b/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NN.py
@@ -243,7 +243,7 @@ def predict_movement(self, data, epsilon, batch_size=None, training=False):
         if batch_size is None:
             batch_size = data.shape[0]
         data_nn, true_output_grid = self._make_x_tau(data)
-        res = super().predict_movement(data_nn, epsilon=epsilon, batch_size=batch_size, training=False)
+        res = super().predict_movement(data_nn, epsilon=epsilon, batch_size=batch_size, training=training)
         return res
 
     def train(self, s_batch, a_batch, r_batch, d_batch, s2_batch, tf_writer=None, batch_size=None):
diff --git a/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NNParam.py b/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NNParam.py
index f669770..ee4a300 100644
--- a/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NNParam.py
+++ b/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NNParam.py
@@ -175,3 +175,12 @@ def center_reduce(self, env):
         self._center_reduce_vect(env.get_obs(), "tau")
         self._center_reduce_vect(env.get_obs(), "gm_out")
         self._center_reduce_vect(env.get_obs(), "input_q")
+
+    def _get_adds_mults_from_name(self, obs, attr_nm):
+        add_tmp, mult_tmp = super()._get_adds_mults_from_name(obs, attr_nm)
+        if attr_nm in ["line_status"]:
+            # transform time step overflow into (1. - timestep_overflow) [similar to the leap net papers]
+            # 0 powerline is connected, 1 powerline is NOT connected
+            add_tmp = -1.0
+            mult_tmp = -1.0
+        return add_tmp, mult_tmp
diff --git a/l2rpn_baselines/LeapNetEncoded/study.py b/l2rpn_baselines/LeapNetEncoded/study.py
index fe7a958..9dece75 100644
--- a/l2rpn_baselines/LeapNetEncoded/study.py
+++ b/l2rpn_baselines/LeapNetEncoded/study.py
@@ -56,10 +56,10 @@ def study(env,
     # Run
     # Create agent
     agent = LeapNetEncoded(action_space=env.action_space,
-                        name=name,
-                        store_action=nb_process == 1,
-                        nn_archi=nn_archi,
-                        observation_space=env.observation_space)
+                           name=name,
+                           store_action=nb_process == 1,
+                           nn_archi=nn_archi,
+                           observation_space=env.observation_space)
 
     # Load weights from file
     agent.load(load_path)
diff --git a/l2rpn_baselines/LeapNetEncoded/train.py b/l2rpn_baselines/LeapNetEncoded/train.py
index 5d74e13..9ca8177 100755
--- a/l2rpn_baselines/LeapNetEncoded/train.py
+++ b/l2rpn_baselines/LeapNetEncoded/train.py
@@ -389,7 +389,8 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
                            "day_of_week",
                            "hour_of_day",
                            "minute_of_hour",
-                           "rho"]
+                           "rho",
+                           ]
     li_attr_obs_Tau = ["line_status", "timestep_overflow"]
     list_attr_gm_out = ["a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex", "prod_q", "load_v"] + li_attr_obs_X
 
@@ -405,7 +406,7 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
 
                     'dim_topo': env_init.dim_topo,
 
-                    "sizes_enc": (50, 50, 50, 50),
+                    "sizes_enc": (50, 50, ),
                     "sizes_main": (300, 300, 300),
                     "sizes_out_gm": (100, ),
                     "sizes_Qnet": (200, 200, 200)
diff --git a/l2rpn_baselines/utils/NNParam.py b/l2rpn_baselines/utils/NNParam.py
index 8294d9a..3b9e350 100644
--- a/l2rpn_baselines/utils/NNParam.py
+++ b/l2rpn_baselines/utils/NNParam.py
@@ -224,6 +224,42 @@ def center_reduce(self, env):
         # TODO see TestLeapNet for this feature
         self._center_reduce_vect(env.get_obs(), "x")
 
+    def _get_adds_mults_from_name(self, obs, attr_nm):
+        if attr_nm in ["prod_p"]:
+            add_tmp = np.array([-0.5 * (pmax + pmin) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)])
+            mult_tmp = np.array([1. / max((pmax - pmin), 0.) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)])
+        elif attr_nm in ["prod_q"]:
+            add_tmp = 0.
+            mult_tmp = np.array([1. / max(abs(val), 1.0) for val in obs.prod_q])
+        elif attr_nm in ["load_p", "load_q"]:
+            add_tmp = np.array([-val for val in getattr(obs, attr_nm)])
+            mult_tmp = 0.5
+        elif attr_nm in ["load_v", "prod_v", "v_or", "v_ex"]:
+            add_tmp = 0.
+            mult_tmp = np.array([1. / val for val in getattr(obs, attr_nm)])
+        elif attr_nm == "hour_of_day":
+            add_tmp = -12.
+            mult_tmp = 1.0 / 12
+        elif attr_nm == "minute_of_hour":
+            add_tmp = -30.
+            mult_tmp = 1.0 / 30
+        elif attr_nm == "day_of_week":
+            add_tmp = -4.
+            mult_tmp = 1.0 / 4
+        elif attr_nm == "day":
+            add_tmp = -15.
+            mult_tmp = 1.0 / 15.
+        elif attr_nm in ["target_dispatch", "actual_dispatch"]:
+            add_tmp = 0.
+            mult_tmp = np.array([1. / (pmax - pmin) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)])
+        elif attr_nm in ["a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex"]:
+            add_tmp = 0.
+            mult_tmp = np.array([1.0 / max(val, 1.0) for val in getattr(obs, attr_nm)])
+        else:
+            add_tmp = 0.
+            mult_tmp = 1.0
+        return add_tmp, mult_tmp
+
     def _center_reduce_vect(self, obs, nn_part):
         """
         compute the xxxx_adds and xxxx_mults for one part of the neural network called nn_part,
@@ -238,39 +274,7 @@ def _center_reduce_vect(self, obs, nn_part):
         adds = []
         mults = []
         for attr_nm in li_attr_obs:
-            if attr_nm in ["prod_p"]:
-                add_tmp = np.array([-0.5*(pmax + pmin) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)])
-                mult_tmp = np.array([1./max((pmax - pmin), 0.) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)])
-            elif attr_nm in ["prod_q"]:
-                add_tmp = 0.
-                mult_tmp = np.array([1./max(abs(val), 1.0) for val in obs.prod_q])
-            elif attr_nm in ["load_p", "load_q"]:
-                add_tmp = np.array([-val for val in getattr(obs, attr_nm)])
-                mult_tmp = 0.5
-            elif attr_nm in ["load_v", "prod_v", "v_or", "v_ex"]:
-                add_tmp = 0.
-                mult_tmp = np.array([1. / val for val in getattr(obs, attr_nm)])
-            elif attr_nm == "hour_of_day":
-                add_tmp = -12.
-                mult_tmp = 1.0/12
-            elif attr_nm == "minute_of_hour":
-                add_tmp = -30.
-                mult_tmp = 1.0/30
-            elif attr_nm == "day_of_week":
-                add_tmp = -4.
-                mult_tmp = 1.0/4
-            elif attr_nm == "day":
-                add_tmp = -15.
-                mult_tmp = 1.0/15.
-            elif attr_nm in ["target_dispatch", "actual_dispatch"]:
-                add_tmp = 0.
-                mult_tmp = np.array([1./(pmax - pmin) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)])
-            elif attr_nm in ["a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex"]:
-                add_tmp = 0.
-                mult_tmp = np.array([1.0 / max(val, 1.0) for val in getattr(obs, attr_nm)])
-            else:
-                add_tmp = 0.
-                mult_tmp = 1.0
+            add_tmp, mult_tmp = self._get_adds_mults_from_name(obs, attr_nm)
             mults.append(mult_tmp)
             adds.append(add_tmp)
         setattr(self, "{}_adds".format(nn_part), adds)

From 84d1e5df9e72fa584f5f37c19e3221f0ad2f1e18 Mon Sep 17 00:00:00 2001
From: BDonnot <benjamin.donnot@gmail.com>
Date: Tue, 18 Aug 2020 15:10:06 +0200
Subject: [PATCH 2/3] removing the deprecated SAC baselines

---
 docs/SAC.rst                            |  44 ---
 docs/conf.py                            |   4 +-
 docs/index.rst                          |   1 -
 l2rpn_baselines/SAC/SAC.py              |  18 --
 l2rpn_baselines/SAC/SAC_NN.py           | 281 -------------------
 l2rpn_baselines/SAC/SAC_NNParam.py      |  65 -----
 l2rpn_baselines/SAC/__init__.py         |  11 -
 l2rpn_baselines/SAC/evaluate.py         | 204 --------------
 l2rpn_baselines/SAC/train.py            | 347 ------------------------
 l2rpn_baselines/SACOld/SACOld.py        |   2 +-
 l2rpn_baselines/__init__.py             |   3 +-
 l2rpn_baselines/test/test_import.py     |   5 -
 l2rpn_baselines/test/test_train_eval.py |  58 ----
 setup.py                                |   2 +-
 14 files changed, 5 insertions(+), 1040 deletions(-)
 delete mode 100644 docs/SAC.rst
 delete mode 100644 l2rpn_baselines/SAC/SAC.py
 delete mode 100644 l2rpn_baselines/SAC/SAC_NN.py
 delete mode 100644 l2rpn_baselines/SAC/SAC_NNParam.py
 delete mode 100644 l2rpn_baselines/SAC/__init__.py
 delete mode 100644 l2rpn_baselines/SAC/evaluate.py
 delete mode 100755 l2rpn_baselines/SAC/train.py

diff --git a/docs/SAC.rst b/docs/SAC.rst
deleted file mode 100644
index 688dc0f..0000000
--- a/docs/SAC.rst
+++ /dev/null
@@ -1,44 +0,0 @@
-SAC: Soft Actor Critic
-=========================
-
-This baseline comes from the paper:
-`Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor <https://arxiv.org/abs/1801.01290>`_
-
-**NB** This version is a new implementation of the SAC baselines. We recommend you to start using
-it in new projects. The old version had some issues. Out of backward compatibility, it is still
-available under the name "SACOld".
-
-Description
------------
-This module proposes an implementation of the SAC algorithm.
-
-An example to train this model is available in the train function :ref:`Example-sac`.
-
-Exported class
---------------
-You can use this class with:
-
-.. code-block:: python
-
-    from l2rpn_baselines.SAC import train, evaluate, SAC
-
-.. automodule:: l2rpn_baselines.SAC
-    :members:
-    :autosummary:
-
-Other non exported class
-------------------------
-These classes need to be imported, if you want to import them with (non exhaustive list):
-.. code-block:: python
-
-    from l2rpn_baselines.SAC.SAC_NN import SAC_NN
-    from l2rpn_baselines.SAC.SAC_NNParam import SAC_NNParam
-
-
-.. autoclass:: l2rpn_baselines.SAC.SAC_NN.SAC_NN
-    :members:
-    :autosummary:
-
-.. autoclass:: l2rpn_baselines.SAC.SAC_NNParam.SAC_NNParam
-    :members:
-    :autosummary:
diff --git a/docs/conf.py b/docs/conf.py
index 113d3f2..5e36f9a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -22,8 +22,8 @@
 author = 'Benjamin DONNOT'
 
 # The full version, including alpha/beta/rc tags
-release = '0.4.4'
-version = '0.4'
+release = '0.5.0'
+version = '0.5'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/docs/index.rst b/docs/index.rst
index cc7641c..c2c53d5 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -28,7 +28,6 @@ Baseline already Available
    DeepQSimple
    DoubleDuelingDQN
    DuelQSimple
-   SAC
 
 
 More advanced baselines
diff --git a/l2rpn_baselines/SAC/SAC.py b/l2rpn_baselines/SAC/SAC.py
deleted file mode 100644
index f3619d0..0000000
--- a/l2rpn_baselines/SAC/SAC.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2020, RTE (https://www.rte-france.com)
-# See AUTHORS.txt
-# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
-# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
-# you can obtain one at http://mozilla.org/MPL/2.0/.
-# SPDX-License-Identifier: MPL-2.0
-# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
-
-from l2rpn_baselines.utils import DeepQAgent
-from l2rpn_baselines.SAC.SAC_NN import SAC_NN
-DEFAULT_NAME = "SAC"
-
-
-class SAC(DeepQAgent):
-    """
-    This is the :class:`l2rpn_baselines.utils` agent representing the SAC agent. This does nothing in particular.
-    """
-    pass
diff --git a/l2rpn_baselines/SAC/SAC_NN.py b/l2rpn_baselines/SAC/SAC_NN.py
deleted file mode 100644
index 172fb16..0000000
--- a/l2rpn_baselines/SAC/SAC_NN.py
+++ /dev/null
@@ -1,281 +0,0 @@
-# Copyright (c) 2020, RTE (https://www.rte-france.com)
-# See AUTHORS.txt
-# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
-# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
-# you can obtain one at http://mozilla.org/MPL/2.0/.
-# SPDX-License-Identifier: MPL-2.0
-# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
-
-import numpy as np
-import os
-import tensorflow as tf
-
-# tf2.0 friendly
-import warnings
-
-with warnings.catch_warnings():
-    warnings.filterwarnings("ignore", category=FutureWarning)
-    from tensorflow.keras.models import load_model, Sequential, Model
-    from tensorflow.keras.layers import Activation, Dense
-    from tensorflow.keras.layers import Input, Concatenate
-
-from l2rpn_baselines.utils import BaseDeepQ, TrainingParam
-
-
-# This class implements the "Sof Actor Critic" model.
-# It is a custom implementation, courtesy to Clement Goubet
-# The original paper is: https://arxiv.org/abs/1801.01290
-class SAC_NN(BaseDeepQ):
-    """
-    Constructs the desired soft actor critic network.
-
-    Compared to other baselines shown elsewhere (*eg* :class:`l2rpn_baselines.DeepQSimple` or
-    :class:`l2rpn_baselines.DeepQSimple`) the implementation of the SAC is a bit more tricky.
-
-    However, we demonstrate here that the use of :class:`l2rpn_baselines.utils.BaseDeepQ` with custom
-    parameters class (in this calse :class:`SAC_NNParam` is flexible enough to meet our needs.
-
-    References
-    -----------
-    Original paper:
-    https://arxiv.org/abs/1801.01290
-
-    modified for discrete action space:
-    https://arxiv.org/abs/1910.07207
-    """
-    def __init__(self,
-                 nn_params,
-                 training_param=None,
-                 verbose=False):
-        if training_param is None:
-            training_param = TrainingParam()
-        BaseDeepQ.__init__(self,
-                           nn_params,
-                           training_param,
-                           verbose=verbose)
-
-        # TODO add as meta param the number of "Q" you want to use (here 2)
-        # TODO add as meta param size and types of the networks
-        self.average_reward = 0
-        self.life_spent = 1
-        self.qvalue_evolution = np.zeros((0,))
-        self.Is_nan = False
-
-        self.model_value_target = None
-        self.model_value = None
-        self.model_Q = None
-        self.model_Q2 = None
-        self.model_policy = None
-
-        self.previous_size = 0
-        self.previous_eyes = None
-        self.previous_arange = None
-        self.previous_size_train = 0
-        self.previous_eyes_train = None
-
-        # optimizers and learning rate
-        self.schedule_lr_policy = None
-        self.optimizer_policy = None
-        self.schedule_lr_Q = None
-        self.optimizer_Q = None
-        self.schedule_lr_Q2 = None
-        self.optimizer_Q2 = None
-        self.schedule_lr_value = None
-        self.optimizer_value = None
-
-        self.construct_q_network()
-
-    def _build_q_NN(self):
-        input_states = Input(shape=(self._observation_size,))
-        input_action = Input(shape=(self._action_size,))
-
-        input_layer = Concatenate()([input_states, input_action])
-        lay = input_layer
-        for lay_num, (size, act) in enumerate(zip(self._nn_archi.sizes, self._nn_archi.activs)):
-            lay = Dense(size, name="layer_{}".format(lay_num))(lay)  # put at self.action_size
-            lay = Activation(act)(lay)
-
-        advantage = Dense(1, activation='linear')(lay)
-
-        model = Model(inputs=[input_states, input_action], outputs=[advantage])
-        return model
-
-    def _build_model_value(self):
-        input_states = Input(shape=(self._observation_size,))
-
-        lay = input_states
-        for lay_num, (size, act) in enumerate(zip(self._nn_archi.sizes_value, self._nn_archi.activs_value)):
-            lay = Dense(size)(lay)
-            lay = Activation(act)(lay)
-
-        advantage = Dense(self._action_size, activation='relu')(lay)
-        state_value = Dense(1, activation='linear', name="state_value")(advantage)
-        model = Model(inputs=[input_states], outputs=[state_value])
-        return model
-
-    def construct_q_network(self):
-        """
-        This constructs all the networks needed for the SAC agent.
-        """
-        self.model_Q = self._build_q_NN()
-        self.schedule_lr_Q, self.optimizer_Q = self.make_optimiser()
-        self.model_Q.compile(loss='mse', optimizer=self.optimizer_Q)
-
-        self.model_Q2 = self._build_q_NN()
-        self.schedule_lr_Q2, self.optimizer_Q2 = self.make_optimiser()
-        self.model_Q2.compile(loss='mse', optimizer=self.optimizer_Q2)
-
-        # state value function approximation
-        self.model_value = self._build_model_value()
-        self.schedule_lr_value, self.optimizer_value = self.make_optimiser()
-        self._optimizer_model = self.optimizer_value
-        self.model_value.compile(loss='mse', optimizer=self.optimizer_value)
-
-        self.model_value_target = self._build_model_value()
-        self.model_value_target.set_weights(self.model_value.get_weights())
-
-        # policy function approximation
-        self.model_policy = Sequential()
-        # proba of choosing action a depending on policy pi
-        input_states = Input(shape=(self._observation_size,))
-        lay = input_states
-        for lay_num, (size, act) in enumerate(zip(self._nn_archi.sizes_policy, self._nn_archi.activs_policy)):
-            lay = Dense(size)(lay)
-            lay = Activation(act)(lay)
-        soft_proba = Dense(self._action_size, activation="softmax", kernel_initializer='uniform', name="soft_proba")(lay)
-        self.model_policy = Model(inputs=[input_states], outputs=[soft_proba])
-        self.schedule_lr_policy, self.optimizer_policy = self.make_optimiser()
-        self.model_policy.compile(loss='categorical_crossentropy', optimizer=self.optimizer_policy)
-
-    def _get_eye_pm(self, batch_size):
-        if batch_size != self.previous_size:
-            tmp = np.zeros((batch_size, self._action_size), dtype=np.float32)
-            self.previous_eyes = tmp
-            self.previous_arange = np.arange(batch_size)
-            self.previous_size = batch_size
-        return self.previous_eyes, self.previous_arange
-
-    def predict_movement(self, data, epsilon, batch_size=None, training=False):
-        """
-        predict the next movements in a vectorized fashion
-        """
-        if batch_size is None:
-            batch_size = data.shape[0]
-        rand_val = np.random.random(data.shape[0])
-        p_actions = self.model_policy(data, training=training).numpy()
-        opt_policy_orig = np.argmax(np.abs(p_actions), axis=-1)
-        opt_policy = 1.0 * opt_policy_orig
-        opt_policy[rand_val < epsilon] = np.random.randint(0, self._action_size, size=(np.sum(rand_val < epsilon)))
-        opt_policy = opt_policy.astype(np.int)
-        idx = np.arange(batch_size)
-        return opt_policy, p_actions[idx, opt_policy], p_actions
-
-    def _get_eye_train(self, batch_size):
-        if batch_size != self.previous_size_train:
-            self.previous_eyes_train = np.repeat(np.eye(self._action_size),
-                                                 batch_size * np.ones(self._action_size, dtype=np.int),
-                                                 axis=0)
-            self.previous_eyes_train = tf.convert_to_tensor(self.previous_eyes_train, dtype=tf.float32)
-            self.previous_size_train = batch_size
-        return self.previous_eyes_train
-
-    def train(self, s_batch, a_batch, r_batch, d_batch, s2_batch, tf_writer=None, batch_size=None):
-        """Trains networks to fit given parameters"""
-        if batch_size is None:
-            batch_size = s_batch.shape[0]
-        target = np.zeros((batch_size, 1))
-
-        # training of the action state value networks
-        last_action = np.zeros((batch_size, self._action_size))
-
-        # Save the graph just the first time
-        if tf_writer is not None:
-            tf.summary.trace_on()
-        # TODO is it s2 or s ? For me it should be s...
-        fut_action = self.model_value_target(s2_batch, training=True).numpy().reshape(-1)
-        # TODO ***_target should be for the Q function instead imho
-
-        if tf_writer is not None:
-            with tf_writer.as_default():
-                tf.summary.trace_export("model_value_target-graph", 0)
-            tf.summary.trace_off()
-
-        # TODO is it rather `targets[:, a_batch]`
-        target[:, 0] = r_batch + (1 - d_batch) * self._training_param.discount_factor * fut_action
-        # target[:, a_batch] = r_batch + (1 - d_batch) * self._training_param.discount_factor * fut_action
-        loss = self.model_Q.train_on_batch([s_batch, last_action], target)
-        loss_2 = self.model_Q2.train_on_batch([s_batch, last_action], target)
-
-        self.life_spent += 1
-        temp = 1 / np.log(self.life_spent) / 2
-        tiled_batch = np.tile(s_batch, (self._action_size, 1))
-        tiled_batch_ts = tf.convert_to_tensor(tiled_batch)
-        # tiled_batch: output something like: batch, batch, batch
-        # TODO save that somewhere not to compute it each time, you can even save this in the
-        # TODO tensorflow graph!
-        tmp = self._get_eye_train(batch_size)
-
-        action_v1_orig = self.model_Q.predict([tiled_batch_ts, tmp], batch_size=batch_size).reshape(batch_size, -1)
-        action_v2_orig = self.model_Q2.predict([tiled_batch_ts, tmp], batch_size=batch_size).reshape(batch_size, -1)
-        action_v1 = action_v1_orig - np.amax(action_v1_orig, axis=-1).reshape(batch_size, 1)
-        new_proba = np.exp(action_v1 / temp) / np.sum(np.exp(action_v1 / temp), axis=-1).reshape(batch_size, 1)
-        new_proba_ts = tf.convert_to_tensor(new_proba)
-        loss_policy = self.model_policy.train_on_batch(s_batch, new_proba_ts)
-
-        target_pi = self.model_policy.predict(s_batch, batch_size=batch_size)
-        value_target = np.fmin(action_v1_orig[0, a_batch], action_v2_orig[0, a_batch]) - np.sum(
-            target_pi * np.log(target_pi + 1e-6))
-        value_target_ts = tf.convert_to_tensor(value_target.reshape(-1, 1))
-        loss_value = self.model_value.train_on_batch(s_batch, value_target_ts)
-
-        self.Is_nan = np.isnan(loss) + np.isnan(loss_2) + np.isnan(loss_policy) + np.isnan(loss_value)
-        return np.all(np.isfinite(loss)) & np.all(np.isfinite(loss_2)) & np.all(np.isfinite(loss_policy)) & \
-               np.all(np.isfinite(loss_value))
-
-    @staticmethod
-    def _get_path_model(path, name=None):
-        if name is None:
-            path_model = path
-        else:
-            path_model = os.path.join(path, name)
-        path_target_model = "{}_target".format(path_model)
-        path_modelQ = "{}_Q".format(path_model)
-        path_modelQ2 = "{}_Q2".format(path_model)
-        path_policy = "{}_policy".format(path_model)
-        return path_model, path_target_model, path_modelQ, path_modelQ2, path_policy
-
-    def save_network(self, path, name=None, ext="h5"):
-        """
-        Saves all the models with unique names
-        """
-        path_model, path_target_model, path_modelQ, path_modelQ2, path_policy = self._get_path_model(path, name)
-        self.model_value.save('{}.{}'.format(path_model, ext))
-        self.model_value_target.save('{}.{}'.format(path_target_model, ext))
-        self.model_Q.save('{}.{}'.format(path_modelQ, ext))
-        self.model_Q2.save('{}.{}'.format(path_modelQ2, ext))
-        self.model_policy.save('{}.{}'.format(path_policy, ext))
-
-    def load_network(self, path, name=None, ext="h5"):
-        """
-        We load all the models using the keras "load_model" function.
-        """
-        path_model, path_target_model, path_modelQ, path_modelQ2, path_policy = self._get_path_model(path, name)
-        self.construct_q_network()
-        self.model_value.load_weights('{}.{}'.format(path_model, ext))
-        self.model_value_target.load_weights('{}.{}'.format(path_target_model, ext))
-        self.model_Q.load_weights('{}.{}'.format(path_modelQ, ext))
-        self.model_Q2.load_weights('{}.{}'.format(path_modelQ2, ext))
-        self.model_policy.load_weights('{}.{}'.format(path_policy, ext))
-        if self.verbose:
-            print("Succesfully loaded network.")
-
-    def target_train(self):
-        """
-        This update the target model.
-        """
-        model_weights = self.model_value.get_weights()
-        target_model_weights = self.model_value_target.get_weights()
-        for i in range(len(model_weights)):
-            target_model_weights[i] = self._training_param.tau * model_weights[i] + (1 - self._training_param.tau) * \
-                                      target_model_weights[i]
-        self.model_value_target.set_weights(model_weights)
diff --git a/l2rpn_baselines/SAC/SAC_NNParam.py b/l2rpn_baselines/SAC/SAC_NNParam.py
deleted file mode 100644
index 7e008ac..0000000
--- a/l2rpn_baselines/SAC/SAC_NNParam.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) 2020, RTE (https://www.rte-france.com)
-# See AUTHORS.txt
-# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
-# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
-# you can obtain one at http://mozilla.org/MPL/2.0/.
-# SPDX-License-Identifier: MPL-2.0
-# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
-import copy
-
-from l2rpn_baselines.utils import NNParam
-from l2rpn_baselines.SAC.SAC_NN import SAC_NN
-
-
-class SAC_NNParam(NNParam):
-    """
-
-    Attributes
-    ----------
-    sizes_value: ``list``
-        List of integer, each one representing the size of the hidden layer for the "value" neural network.
-
-    activs_value: ``list``
-        List of ``str`` for each hidden layer of the "value" neural network, indicates which hidden layer to use
-
-    sizes_policy: ``list``
-        List of integers, each reprenseting the size of the hidden layer for the "policy" network.
-
-    activs_policy: ``list``
-        List of ``str``: The activation functions (for each layer) of the policy network
-
-    """
-    _int_attr = copy.deepcopy(NNParam._int_attr)
-    _float_attr = copy.deepcopy(NNParam._float_attr)
-    _str_attr = copy.deepcopy(NNParam._str_attr)
-    _list_float = copy.deepcopy(NNParam._list_float)
-    _list_str = copy.deepcopy(NNParam._list_str)
-    _list_int = copy.deepcopy(NNParam._list_int)
-
-    _list_str += ["activs_value", "activs_policy"]
-    _list_int += ["sizes_value", "sizes_policy"]
-
-    nn_class = SAC_NN
-
-    def __init__(self,
-                 action_size,
-                 observation_size,  # TODO this might not be usefull
-                 sizes,
-                 activs,
-                 list_attr_obs,
-                 sizes_value,
-                 activs_value,
-                 sizes_policy,
-                 activs_policy
-                 ):
-        NNParam.__init__(self,
-                         action_size,
-                         observation_size,  # TODO this might not be usefull
-                         sizes,
-                         activs,
-                         list_attr_obs
-                         )
-        self.sizes_value = sizes_value
-        self.activs_value = activs_value
-        self.sizes_policy = sizes_policy
-        self.activs_policy = activs_policy
diff --git a/l2rpn_baselines/SAC/__init__.py b/l2rpn_baselines/SAC/__init__.py
deleted file mode 100644
index 8ca58cf..0000000
--- a/l2rpn_baselines/SAC/__init__.py
+++ /dev/null
@@ -1,11 +0,0 @@
-__all__ = [
-    "SAC",
-    "evaluate",
-    "train",
-    "SAC_NNParam"
-]
-
-from l2rpn_baselines.SAC.SAC import SAC
-from l2rpn_baselines.SAC.evaluate import evaluate
-from l2rpn_baselines.SAC.train import train
-from l2rpn_baselines.SAC.SAC_NNParam import SAC_NNParam
diff --git a/l2rpn_baselines/SAC/evaluate.py b/l2rpn_baselines/SAC/evaluate.py
deleted file mode 100644
index 3296245..0000000
--- a/l2rpn_baselines/SAC/evaluate.py
+++ /dev/null
@@ -1,204 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2020, RTE (https://www.rte-france.com)
-# See AUTHORS.txt
-# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
-# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
-# you can obtain one at http://mozilla.org/MPL/2.0/.
-# SPDX-License-Identifier: MPL-2.0
-# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
-
-import os
-import tensorflow as tf
-
-from grid2op.MakeEnv import make
-from grid2op.Runner import Runner
-from grid2op.Reward import *
-from grid2op.Action import *
-
-from l2rpn_baselines.utils.save_log_gif import save_log_gif
-from l2rpn_baselines.SAC.SAC import SAC, DEFAULT_NAME
-from l2rpn_baselines.SAC.SAC_NNParam import SAC_NNParam
-from l2rpn_baselines.SAC.SAC_NN import SAC_NN
-
-DEFAULT_LOGS_DIR = "./logs-eval/do-nothing-baseline"
-DEFAULT_NB_EPISODE = 1
-DEFAULT_NB_PROCESS = 1
-DEFAULT_MAX_STEPS = -1
-
-
-def evaluate(env,
-             name=DEFAULT_NAME,
-             load_path=None,
-             logs_path=DEFAULT_LOGS_DIR,
-             nb_episode=DEFAULT_NB_EPISODE,
-             nb_process=DEFAULT_NB_PROCESS,
-             max_steps=DEFAULT_MAX_STEPS,
-             verbose=False,
-             save_gif=False):
-    """
-    How to evaluate the performances of the trained SAC agent.
-
-    Parameters
-    ----------
-    env: :class:`grid2op.Environment`
-        The environment on which you evaluate your agent.
-
-    name: ``str``
-        The name of the trained baseline
-
-    load_path: ``str``
-        Path where the agent has been stored
-
-    logs_path: ``str``
-        Where to write the results of the assessment
-
-    nb_episode: ``str``
-        How many episodes to run during the assessment of the performances
-
-    nb_process: ``int``
-        On how many process the assessment will be made. (setting this > 1 can lead to some speed ups but can be
-        unstable on some plaform)
-
-    max_steps: ``int``
-        How many steps at maximum your agent will be assessed
-
-    verbose: ``bool``
-        Currently un used
-
-    save_gif: ``bool``
-        Whether or not you want to save, as a gif, the performance of your agent. It might cause memory issues (might
-        take a lot of ram) and drastically increase computation time.
-
-    Returns
-    -------
-    agent: :class:`l2rpn_baselines.utils.DeepQAgent`
-        The loaded agent that has been evaluated thanks to the runner.
-
-    res: ``list``
-        The results of the Runner on which the agent was tested.
-
-
-    Examples
-    -------
-    You can evaluate a DeepQSimple this way:
-
-    .. code-block:: python
-
-        from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward
-        from l2rpn_baselines.SAC import eval
-
-        # Create dataset env
-        env = make("l2rpn_case14_sandbox",
-                   reward_class=L2RPNSandBoxScore,
-                   other_rewards={
-                       "reward": L2RPNReward
-                   })
-
-        # Call evaluation interface
-        evaluate(env,
-                 name="MyAwesomeAgent",
-                 load_path="/WHERE/I/SAVED/THE/MODEL",
-                 logs_path=None,
-                 nb_episode=10,
-                 nb_process=1,
-                 max_steps=-1,
-                 verbose=False,
-                 save_gif=False)
-    """
-
-    # Limit gpu usage
-    physical_devices = tf.config.list_physical_devices('GPU')
-    if len(physical_devices):
-        tf.config.experimental.set_memory_growth(physical_devices[0], True)
-
-    runner_params = env.get_params_for_runner()
-    runner_params["verbose"] = verbose
-
-    if load_path is None:
-        raise RuntimeError("Cannot evaluate a model if there is nothing to be loaded.")
-    path_model, path_target_model = SAC_NN.get_path_model(load_path, name)
-    nn_archi = SAC_NNParam.from_json(os.path.join(path_model, "nn_architecture.json"))
-
-    # Run
-    # Create agent
-    agent = SAC(action_space=env.action_space,
-                name=name,
-                store_action=nb_process == 1,
-                nn_archi=nn_archi,
-                observation_space=env.observation_space)
-
-    # Load weights from file
-    agent.load(load_path)
-
-    # Print model summary
-    stringlist = []
-    agent.deep_q.model_value.summary(print_fn=lambda x: stringlist.append(x))
-    short_model_summary = "\n".join(stringlist)
-
-    if verbose:
-        print("Value model: {}".format(short_model_summary))
-
-    # Build runner
-    runner = Runner(**runner_params,
-                    agentClass=None,
-                    agentInstance=agent)
-
-    # Run
-    os.makedirs(logs_path, exist_ok=True)
-    res = runner.run(path_save=logs_path,
-                     nb_episode=nb_episode,
-                     nb_process=nb_process,
-                     max_iter=max_steps,
-                     pbar=verbose)
-
-    # Print summary
-
-    if verbose:
-        print("Evaluation summary:")
-        for _, chron_name, cum_reward, nb_time_step, max_ts in res:
-            msg_tmp = "chronics at: {}".format(chron_name)
-            msg_tmp += "\ttotal score: {:.6f}".format(cum_reward)
-            msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts)
-            print(msg_tmp)
-
-        if len(agent.dict_action):
-            # I output some of the actions played
-            print("The agent played {} different action".format(len(agent.dict_action)))
-            for id_, (nb, act, types) in agent.dict_action.items():
-                print("Action with ID {} was played {} times".format(id_, nb))
-                print("{}".format(act))
-                print("-----------")
-
-    if save_gif:
-        if verbose:
-            print("Saving the gif of the episodes")
-        save_log_gif(logs_path, res)
-
-    return agent, res
-
-
-if __name__ == "__main__":
-    from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward
-    from l2rpn_baselines.utils import cli_eval
-
-    # Parse command line
-    args = cli_eval().parse_args()
-
-    # Create dataset env
-    env = make(args.env_name,
-               reward_class=L2RPNSandBoxScore,
-               other_rewards={
-                   "reward": L2RPNReward
-               })
-
-    # Call evaluation interface
-    evaluate(env,
-             name=args.name,
-             load_path=os.path.abspath(args.load_path),
-             logs_path=args.logs_dir,
-             nb_episode=args.nb_episode,
-             nb_process=args.nb_process,
-             max_steps=args.max_steps,
-             verbose=args.verbose,
-             save_gif=args.save_gif)
diff --git a/l2rpn_baselines/SAC/train.py b/l2rpn_baselines/SAC/train.py
deleted file mode 100755
index da44016..0000000
--- a/l2rpn_baselines/SAC/train.py
+++ /dev/null
@@ -1,347 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2020, RTE (https://www.rte-france.com)
-# See AUTHORS.txt
-# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
-# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
-# you can obtain one at http://mozilla.org/MPL/2.0/.
-# SPDX-License-Identifier: MPL-2.0
-# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
-
-import os
-import tensorflow as tf
-import warnings
-
-from l2rpn_baselines.utils import cli_train
-from l2rpn_baselines.SAC.SAC import SAC, DEFAULT_NAME
-from l2rpn_baselines.SAC.SAC_NNParam import SAC_NNParam
-from l2rpn_baselines.SAC.SAC_NN import SAC_NN
-from l2rpn_baselines.utils import TrainingParam
-from l2rpn_baselines.utils.waring_msgs import _WARN_GPU_MEMORY
-
-
-def train(env,
-          name=DEFAULT_NAME,
-          iterations=1,
-          save_path=None,
-          load_path=None,
-          logs_dir=None,
-          training_param=None,
-          filter_action_fun=None,
-          verbose=True,
-          kwargs_converters={},
-          kwargs_archi={}):
-    """
-    This function implements the "training" part of the balines "DeepQSimple".
-
-    Parameters
-    ----------
-    env: :class:`grid2op.Environment`
-        Then environment on which you need to train your agent.
-
-    name: ``str```
-        The name of your agent.
-
-    iterations: ``int``
-        For how many iterations (steps) do you want to train your agent. NB these are not episode, these are steps.
-
-    save_path: ``str``
-        Where do you want to save your baseline.
-
-    load_path: ``str``
-        If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded
-        some of the argument provided to this function will not be used.
-
-    logs_dir: ``str``
-        Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them.
-
-    verbose: ``bool``
-        If you want something to be printed on the terminal (a better logging strategy will be put at some point)
-
-    training_param: :class:`l2rpn_baselines.utils.TrainingParam`
-        The parameters describing the way you will train your model.
-
-    filter_action_fun: ``function``
-        A function to filter the action space. See
-        `IdToAct.filter_action <https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.filter_action>`_
-        documentation.
-
-    kwargs_converters: ``dict``
-        A dictionary containing the key-word arguments pass at this initialization of the
-        :class:`grid2op.Converter.IdToAct` that serves as "Base" for the Agent.
-
-    kwargs_archi: ``dict``
-        Key word arguments used for making the :class:`DeepQ_NNParam` object that will be used to build the baseline.
-
-    Returns
-    -------
-
-    baseline: :class:`DeepQSimple`
-        The trained baseline.
-
-
-    .. _Example-sac:
-
-    Examples
-    ---------
-    Here is an example on how to train a SAC baseline.
-
-    First define a python script, for example
-
-    .. code-block:: python
-
-        import grid2op
-        from grid2op.Reward import L2RPNReward
-        from l2rpn_baselines.utils import TrainingParam, NNParam
-        from l2rpn_baselines.SAC import train
-
-        # define the environment
-        env = grid2op.make("l2rpn_case14_sandbox",
-                           reward_class=L2RPNReward)
-
-        # use the default training parameters
-        tp = TrainingParam()
-
-        # this will be the list of what part of the observation I want to keep
-        # more information on https://grid2op.readthedocs.io/en/latest/observation.html#main-observation-attributes
-        li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
-                         "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
-                         "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]
-
-        # neural network architecture
-        observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
-        sizes_q = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
-        sizes_v = [800, 800]  # sizes of each hidden layers
-        sizes_pol = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
-        kwargs_archi = {'observation_size': observation_size,
-                        'sizes': sizes_q,
-                        'activs': ["relu" for _ in range(len(sizes_q))],
-                        "list_attr_obs": li_attr_obs_X,
-                        "sizes_value": sizes_v,
-                        "activs_value": ["relu" for _ in range(len(sizes_v))],
-                        "sizes_policy": sizes_pol,
-                        "activs_policy": ["relu" for _ in range(len(sizes_pol))]
-                        }
-
-        # select some part of the action
-        # more information at https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.init_converter
-        kwargs_converters = {"all_actions": None,
-                             "set_line_status": False,
-                             "change_bus_vect": True,
-                             "set_topo_vect": False
-                             }
-        # define the name of the model
-        nm_ = "AnneOnymous"
-        try:
-            train(env,
-                  name=nm_,
-                  iterations=10000,
-                  save_path="/WHERE/I/SAVED/THE/MODEL",
-                  load_path=None,
-                  logs_dir="/WHERE/I/SAVED/THE/LOGS",
-                  training_param=tp,
-                  kwargs_converters=kwargs_converters,
-                  kwargs_archi=kwargs_archi)
-        finally:
-            env.close()
-
-    """
-
-    # Limit gpu usage
-    try:
-        physical_devices = tf.config.list_physical_devices('GPU')
-        if len(physical_devices) > 0:
-            tf.config.experimental.set_memory_growth(physical_devices[0], True)
-    except AttributeError:
-         # issue of https://stackoverflow.com/questions/59266150/attributeerror-module-tensorflow-core-api-v2-config-has-no-attribute-list-p
-        try:
-            physical_devices = tf.config.experimental.list_physical_devices('GPU')
-            if len(physical_devices) > 0:
-                tf.config.experimental.set_memory_growth(physical_devices[0], True)
-        except Exception:
-            warnings.warn(_WARN_GPU_MEMORY)
-    except Exception:
-        warnings.warn(_WARN_GPU_MEMORY)
-
-    if training_param is None:
-        training_param = TrainingParam()
-
-    # compute the proper size for the converter
-    kwargs_archi["action_size"] = SAC.get_action_size(env.action_space, filter_action_fun, kwargs_converters)
-
-    if load_path is not None:
-        path_model, path_target_model = SAC_NN.get_path_model(load_path, name)
-        if verbose:
-            print("INFO: Reloading a model, the architecture parameters provided will be ignored")
-        nn_archi = SAC_NNParam.from_json(os.path.join(path_model, "nn_architecture.json"))
-    else:
-        nn_archi = SAC_NNParam(**kwargs_archi)
-
-    baseline = SAC(action_space=env.action_space,
-                   nn_archi=nn_archi,
-                   name=name,
-                   istraining=True,
-                   verbose=verbose,
-                   **kwargs_converters
-                   )
-
-    if load_path is not None:
-        if verbose:
-            print("INFO: Reloading a model, training parameters will be ignored")
-        baseline.load(load_path)
-        training_param = baseline._training_param
-
-    baseline.train(env,
-                   iterations,
-                   save_path=save_path,
-                   logdir=logs_dir,
-                   training_param=training_param)
-    # as in our example (and in our explanation) we recommend to save the mode regurlarly in the "train" function
-    # it is not necessary to save it again here. But if you chose not to follow these advice, it is more than
-    # recommended to save the "baseline" at the end of this function with:
-    # baseline.save(path_save)
-
-
-if __name__ == "__main__":
-    # import grid2op
-    import numpy as np
-    from grid2op.Parameters import Parameters
-    from grid2op import make
-    from grid2op.Reward import L2RPNReward
-    import re
-    try:
-        from lightsim2grid.LightSimBackend import LightSimBackend
-        backend = LightSimBackend()
-    except:
-        from grid2op.Backend import PandaPowerBackend
-        backend = PandaPowerBackend()
-
-    args = cli_train().parse_args()
-
-    # is it highly recommended to modify the reward depening on the algorithm.
-    # for example here i will push my algorithm to learn that plyaing illegal or ambiguous action is bad
-    class MyReward(L2RPNReward):
-        def initialize(self, env):
-            self.reward_min = 0.0
-            self.reward_max = 1.0
-
-        def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
-            if has_error or is_illegal or is_ambiguous:
-                # previous action was bad
-                res = self.reward_min
-            elif is_done:
-                # really strong reward if an episode is over without game over
-                res = self.reward_max
-            else:
-                res = super().__call__(action, env, has_error, is_done, is_illegal, is_ambiguous)
-                res /= env.n_line
-                if not np.isfinite(res):
-                    res = self.reward_min
-            return res
-
-    # Use custom params
-
-    # Create grid2op game environement
-    env_init = None
-    try:
-        from grid2op.Chronics import MultifolderWithCache
-    except:
-        from grid2op.Chronics import MultiFolder
-        MultifolderWithCache = MultiFolder
-
-    game_param = Parameters()
-    game_param.NB_TIMESTEP_COOLDOWN_SUB = 2
-    game_param.NB_TIMESTEP_COOLDOWN_LINE = 2
-    env = make(args.env_name,
-               param=game_param,
-               reward_class=MyReward,
-               backend=backend,
-               chronics_class=MultifolderWithCache
-               )
-    # env.chronics_handler.set_max_iter(7*288)
-    try:
-        env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((03)|(72)|(57))$", x) is not None)
-        env.chronics_handler.real_data.reset()
-    except RuntimeError as exc_:
-        raise exc_
-    except AttributeError as exc_:
-        # not available in all grid2op version
-        pass
-    # env.chronics_handler.real_data.
-    env_init = env
-    if args.nb_env > 1:
-        from l2rpn_baselines.utils import make_multi_env
-        env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))
-
-    tp = TrainingParam()
-
-    # NN training
-    tp.lr = 1e-4
-    tp.lr_decay_steps = 30000
-    tp.minibatch_size = 256
-    tp.update_freq = 128
-
-    # limit the number of time steps played per scenarios
-    tp.step_increase_nb_iter = 100  # None to deactivate it
-    tp.min_iter = 10
-    tp.update_nb_iter = 100  # once 100 scenarios are solved, increase of "step_increase_nb_iter"
-
-    # oversampling hard scenarios
-    tp.oversampling_rate = 3
-
-    # experience replay
-    tp.buffer_size = 1000000
-
-    # e greedy
-    tp.min_observation = 10000
-    tp.initial_epsilon = 0.4
-    tp.final_epsilon = 1./(2*7*288.)
-    tp.step_for_final_epsilon = int(1e5)
-
-    # don't start always at the same hour (if not None) otherwise random sampling, see docs
-    tp.random_sample_datetime_start = None
-
-    # saving, logging etc.
-    tp.save_model_each = 10000
-    tp.update_tensorboard_freq = 256
-
-    li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
-                     "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
-                     "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"]
-
-    # nn architecture
-    observation_size = SAC_NNParam.get_obs_size(env_init, li_attr_obs_X)
-    sizes_q = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
-    sizes_v = [800, 800]  # sizes of each hidden layers
-    sizes_pol = [800, 800, 800, 494, 494, 494]  # sizes of each hidden layers
-    kwargs_archi = {'observation_size': observation_size,
-                    'sizes': sizes_q,
-                    'activs': ["relu" for _ in range(len(sizes_q))],
-                    "list_attr_obs": li_attr_obs_X,
-                    "sizes_value": sizes_v,
-                    "activs_value": ["relu" for _ in range(len(sizes_v))],
-                    "sizes_policy": sizes_pol,
-                    "activs_policy": ["relu" for _ in range(len(sizes_pol))]
-                    }
-
-    # which actions i keep
-    kwargs_converters = {"all_actions": None,
-                         "set_line_status": False,
-                         "change_bus_vect": True,
-                         "set_topo_vect": False,
-                         }
-    nm_ = args.name if args.name is not None else DEFAULT_NAME
-    try:
-        train(env,
-              name=nm_,
-              iterations=args.num_train_steps,
-              save_path=args.save_path,
-              load_path=args.load_path,
-              logs_dir=args.logs_dir,
-              training_param=tp,
-              kwargs_converters=kwargs_converters,
-              kwargs_archi=kwargs_archi)
-    finally:
-        env.close()
-        if args.nb_env > 1:
-            env_init.close()
diff --git a/l2rpn_baselines/SACOld/SACOld.py b/l2rpn_baselines/SACOld/SACOld.py
index ab5ad39..0f28e82 100644
--- a/l2rpn_baselines/SACOld/SACOld.py
+++ b/l2rpn_baselines/SACOld/SACOld.py
@@ -7,7 +7,7 @@
 # This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
 
 from l2rpn_baselines.utils import DeepQAgent
-from l2rpn_baselines.SAC.SAC_NN import SAC_NN
+from l2rpn_baselines.SACOld.SACOld_NN import SACOld_NN
 DEFAULT_NAME = "SACOld"
 
 
diff --git a/l2rpn_baselines/__init__.py b/l2rpn_baselines/__init__.py
index 54668e5..b12a686 100644
--- a/l2rpn_baselines/__init__.py
+++ b/l2rpn_baselines/__init__.py
@@ -6,7 +6,6 @@
     "SliceRDQN",
     "DeepQSimple",
     "DuelQSimple",
-    "SAC",
     "LeapNetEncoded",
     # Backward compatibility
     "SACOld",
@@ -18,4 +17,4 @@
     "utils"
 ]
 
-__version__ = "0.4.4"
+__version__ = "0.5.0"
diff --git a/l2rpn_baselines/test/test_import.py b/l2rpn_baselines/test/test_import.py
index ba16d6b..4c0456c 100644
--- a/l2rpn_baselines/test/test_import.py
+++ b/l2rpn_baselines/test/test_import.py
@@ -45,11 +45,6 @@ def load_module(self):
         return "DeepQSimple"
 
 
-class TestSAC(TestImport, unittest.TestCase):
-    def load_module(self):
-        return "SAC"
-
-
 class TestSACOld(TestImport, unittest.TestCase):
     def load_module(self):
         return "SACOld"
diff --git a/l2rpn_baselines/test/test_train_eval.py b/l2rpn_baselines/test/test_train_eval.py
index b9f91a5..abd6998 100644
--- a/l2rpn_baselines/test/test_train_eval.py
+++ b/l2rpn_baselines/test/test_train_eval.py
@@ -25,8 +25,6 @@
 from l2rpn_baselines.DuelQSimple import evaluate as eval_d3qs
 from l2rpn_baselines.SACOld import train as train_sacold
 from l2rpn_baselines.SACOld import evaluate as eval_sacold
-from l2rpn_baselines.SAC import train as train_sac
-from l2rpn_baselines.SAC import evaluate as eval_sac
 from l2rpn_baselines.DuelQLeapNet import train as train_leap
 from l2rpn_baselines.DuelQLeapNet import evaluate as eval_leap
 from l2rpn_baselines.LeapNetEncoded import train as train_leapenc
@@ -343,62 +341,6 @@ def test_train_eval(self):
                                   save_gif=False)
 
 
-class TestSAC(unittest.TestCase):
-    def test_train_eval(self):
-        tp = TrainingParam()
-        tp.buffer_size = 100
-        tp.minibatch_size = 8
-        tp.update_freq = 32
-        tp.min_observation = 32
-        tmp_dir = tempfile.mkdtemp()
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore")
-            env = grid2op.make("rte_case5_example", test=True)
-            li_attr_obs_X = ["prod_p", "load_p", "rho"]
-
-            # neural network architecture
-            observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
-            sizes_q = [100, 50, 10]  # sizes of each hidden layers
-            sizes_v = [100, 100]  # sizes of each hidden layers
-            sizes_pol = [100, 10]  # sizes of each hidden layers
-            kwargs_archi = {'observation_size': observation_size,
-                            'sizes': sizes_q,
-                            'activs': ["relu" for _ in range(len(sizes_q))],
-                            "list_attr_obs": li_attr_obs_X,
-                            "sizes_value": sizes_v,
-                            "activs_value": ["relu" for _ in range(len(sizes_v))],
-                            "sizes_policy": sizes_pol,
-                            "activs_policy": ["relu" for _ in range(len(sizes_pol))]
-                            }
-
-            kwargs_converters = {"all_actions": None,
-                                 "set_line_status": False,
-                                 "change_bus_vect": True,
-                                 "set_topo_vect": False
-                                 }
-            nm_ = "AnneOnymous"
-            train_sac(env,
-                      name=nm_,
-                      iterations=100,
-                      save_path=tmp_dir,
-                      load_path=None,
-                      logs_dir=tmp_dir,
-                      training_param=tp,
-                      verbose=False,
-                      kwargs_converters=kwargs_converters,
-                      kwargs_archi=kwargs_archi)
-
-            baseline_2 = eval_sac(env,
-                                  name=nm_,
-                                  load_path=tmp_dir,
-                                  logs_path=tmp_dir,
-                                  nb_episode=1,
-                                  nb_process=1,
-                                  max_steps=30,
-                                  verbose=False,
-                                  save_gif=False)
-
-
 class TestLeapNet(unittest.TestCase):
     def test_train_eval(self):
         tp = TrainingParam()
diff --git a/setup.py b/setup.py
index dcaa052..4d90368 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,7 @@
 
 import setuptools
 from setuptools import setup
-__version__ = "0.4.4"
+__version__ = "0.5.0"
 
 
 pkgs = {

From f0403bfaef75867b7713957c5df475d9ec175a90 Mon Sep 17 00:00:00 2001
From: BDonnot <benjamin.donnot@gmail.com>
Date: Tue, 18 Aug 2020 15:11:45 +0200
Subject: [PATCH 3/3] updating the changelog

---
 CHANGELOG.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 32cf9de..e7441f2 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,8 +4,10 @@ Change Log
 --------
 - stack multiple states in `utils/DeepQAgent`
 
-[0.5.0] - 2020-08-??
+[0.5.0] - 2020-08-18
 --------------------
+- [BREAKING] remove the SAC baseline that was not correct. For backward compatibility, its code
+  can still be accessed with SACOld
 - [FIXED] the counting of the action types frequency in tensorboard (for some baselines)
 - [FIXED] a broken Replay buffer `utils.ReplayBuffer` (used in some baselines)
 - [FIXED] a bug in using multiple environments for some baselines