From 24b26439fcedd0ff787c46dde861c32a9be27460 Mon Sep 17 00:00:00 2001 From: BDonnot Date: Wed, 12 Aug 2020 08:21:57 +0200 Subject: [PATCH 1/3] some fixes for the leap net encoded --- .../LeapNetEncoded/LeapNetEncoded_NN.py | 2 +- .../LeapNetEncoded/LeapNetEncoded_NNParam.py | 9 +++ l2rpn_baselines/LeapNetEncoded/study.py | 8 +-- l2rpn_baselines/LeapNetEncoded/train.py | 5 +- l2rpn_baselines/utils/NNParam.py | 70 ++++++++++--------- 5 files changed, 54 insertions(+), 40 deletions(-) diff --git a/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NN.py b/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NN.py index e30a0bc..05f75c6 100644 --- a/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NN.py +++ b/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NN.py @@ -243,7 +243,7 @@ def predict_movement(self, data, epsilon, batch_size=None, training=False): if batch_size is None: batch_size = data.shape[0] data_nn, true_output_grid = self._make_x_tau(data) - res = super().predict_movement(data_nn, epsilon=epsilon, batch_size=batch_size, training=False) + res = super().predict_movement(data_nn, epsilon=epsilon, batch_size=batch_size, training=training) return res def train(self, s_batch, a_batch, r_batch, d_batch, s2_batch, tf_writer=None, batch_size=None): diff --git a/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NNParam.py b/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NNParam.py index f669770..ee4a300 100644 --- a/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NNParam.py +++ b/l2rpn_baselines/LeapNetEncoded/LeapNetEncoded_NNParam.py @@ -175,3 +175,12 @@ def center_reduce(self, env): self._center_reduce_vect(env.get_obs(), "tau") self._center_reduce_vect(env.get_obs(), "gm_out") self._center_reduce_vect(env.get_obs(), "input_q") + + def _get_adds_mults_from_name(self, obs, attr_nm): + add_tmp, mult_tmp = super()._get_adds_mults_from_name(obs, attr_nm) + if attr_nm in ["line_status"]: + # transform time step overflow into (1. - timestep_overflow) [similar to the leap net papers] + # 0 powerline is connected, 1 powerline is NOT connected + add_tmp = -1.0 + mult_tmp = -1.0 + return add_tmp, mult_tmp diff --git a/l2rpn_baselines/LeapNetEncoded/study.py b/l2rpn_baselines/LeapNetEncoded/study.py index fe7a958..9dece75 100644 --- a/l2rpn_baselines/LeapNetEncoded/study.py +++ b/l2rpn_baselines/LeapNetEncoded/study.py @@ -56,10 +56,10 @@ def study(env, # Run # Create agent agent = LeapNetEncoded(action_space=env.action_space, - name=name, - store_action=nb_process == 1, - nn_archi=nn_archi, - observation_space=env.observation_space) + name=name, + store_action=nb_process == 1, + nn_archi=nn_archi, + observation_space=env.observation_space) # Load weights from file agent.load(load_path) diff --git a/l2rpn_baselines/LeapNetEncoded/train.py b/l2rpn_baselines/LeapNetEncoded/train.py index 5d74e13..9ca8177 100755 --- a/l2rpn_baselines/LeapNetEncoded/train.py +++ b/l2rpn_baselines/LeapNetEncoded/train.py @@ -389,7 +389,8 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): "day_of_week", "hour_of_day", "minute_of_hour", - "rho"] + "rho", + ] li_attr_obs_Tau = ["line_status", "timestep_overflow"] list_attr_gm_out = ["a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex", "prod_q", "load_v"] + li_attr_obs_X @@ -405,7 +406,7 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): 'dim_topo': env_init.dim_topo, - "sizes_enc": (50, 50, 50, 50), + "sizes_enc": (50, 50, ), "sizes_main": (300, 300, 300), "sizes_out_gm": (100, ), "sizes_Qnet": (200, 200, 200) diff --git a/l2rpn_baselines/utils/NNParam.py b/l2rpn_baselines/utils/NNParam.py index 8294d9a..3b9e350 100644 --- a/l2rpn_baselines/utils/NNParam.py +++ b/l2rpn_baselines/utils/NNParam.py @@ -224,6 +224,42 @@ def center_reduce(self, env): # TODO see TestLeapNet for this feature self._center_reduce_vect(env.get_obs(), "x") + def _get_adds_mults_from_name(self, obs, attr_nm): + if attr_nm in ["prod_p"]: + add_tmp = np.array([-0.5 * (pmax + pmin) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)]) + mult_tmp = np.array([1. / max((pmax - pmin), 0.) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)]) + elif attr_nm in ["prod_q"]: + add_tmp = 0. + mult_tmp = np.array([1. / max(abs(val), 1.0) for val in obs.prod_q]) + elif attr_nm in ["load_p", "load_q"]: + add_tmp = np.array([-val for val in getattr(obs, attr_nm)]) + mult_tmp = 0.5 + elif attr_nm in ["load_v", "prod_v", "v_or", "v_ex"]: + add_tmp = 0. + mult_tmp = np.array([1. / val for val in getattr(obs, attr_nm)]) + elif attr_nm == "hour_of_day": + add_tmp = -12. + mult_tmp = 1.0 / 12 + elif attr_nm == "minute_of_hour": + add_tmp = -30. + mult_tmp = 1.0 / 30 + elif attr_nm == "day_of_week": + add_tmp = -4. + mult_tmp = 1.0 / 4 + elif attr_nm == "day": + add_tmp = -15. + mult_tmp = 1.0 / 15. + elif attr_nm in ["target_dispatch", "actual_dispatch"]: + add_tmp = 0. + mult_tmp = np.array([1. / (pmax - pmin) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)]) + elif attr_nm in ["a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex"]: + add_tmp = 0. + mult_tmp = np.array([1.0 / max(val, 1.0) for val in getattr(obs, attr_nm)]) + else: + add_tmp = 0. + mult_tmp = 1.0 + return add_tmp, mult_tmp + def _center_reduce_vect(self, obs, nn_part): """ compute the xxxx_adds and xxxx_mults for one part of the neural network called nn_part, @@ -238,39 +274,7 @@ def _center_reduce_vect(self, obs, nn_part): adds = [] mults = [] for attr_nm in li_attr_obs: - if attr_nm in ["prod_p"]: - add_tmp = np.array([-0.5*(pmax + pmin) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)]) - mult_tmp = np.array([1./max((pmax - pmin), 0.) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)]) - elif attr_nm in ["prod_q"]: - add_tmp = 0. - mult_tmp = np.array([1./max(abs(val), 1.0) for val in obs.prod_q]) - elif attr_nm in ["load_p", "load_q"]: - add_tmp = np.array([-val for val in getattr(obs, attr_nm)]) - mult_tmp = 0.5 - elif attr_nm in ["load_v", "prod_v", "v_or", "v_ex"]: - add_tmp = 0. - mult_tmp = np.array([1. / val for val in getattr(obs, attr_nm)]) - elif attr_nm == "hour_of_day": - add_tmp = -12. - mult_tmp = 1.0/12 - elif attr_nm == "minute_of_hour": - add_tmp = -30. - mult_tmp = 1.0/30 - elif attr_nm == "day_of_week": - add_tmp = -4. - mult_tmp = 1.0/4 - elif attr_nm == "day": - add_tmp = -15. - mult_tmp = 1.0/15. - elif attr_nm in ["target_dispatch", "actual_dispatch"]: - add_tmp = 0. - mult_tmp = np.array([1./(pmax - pmin) for pmin, pmax in zip(obs.gen_pmin, obs.gen_pmax)]) - elif attr_nm in ["a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex"]: - add_tmp = 0. - mult_tmp = np.array([1.0 / max(val, 1.0) for val in getattr(obs, attr_nm)]) - else: - add_tmp = 0. - mult_tmp = 1.0 + add_tmp, mult_tmp = self._get_adds_mults_from_name(obs, attr_nm) mults.append(mult_tmp) adds.append(add_tmp) setattr(self, "{}_adds".format(nn_part), adds) From 84d1e5df9e72fa584f5f37c19e3221f0ad2f1e18 Mon Sep 17 00:00:00 2001 From: BDonnot Date: Tue, 18 Aug 2020 15:10:06 +0200 Subject: [PATCH 2/3] removing the deprecated SAC baselines --- docs/SAC.rst | 44 --- docs/conf.py | 4 +- docs/index.rst | 1 - l2rpn_baselines/SAC/SAC.py | 18 -- l2rpn_baselines/SAC/SAC_NN.py | 281 ------------------- l2rpn_baselines/SAC/SAC_NNParam.py | 65 ----- l2rpn_baselines/SAC/__init__.py | 11 - l2rpn_baselines/SAC/evaluate.py | 204 -------------- l2rpn_baselines/SAC/train.py | 347 ------------------------ l2rpn_baselines/SACOld/SACOld.py | 2 +- l2rpn_baselines/__init__.py | 3 +- l2rpn_baselines/test/test_import.py | 5 - l2rpn_baselines/test/test_train_eval.py | 58 ---- setup.py | 2 +- 14 files changed, 5 insertions(+), 1040 deletions(-) delete mode 100644 docs/SAC.rst delete mode 100644 l2rpn_baselines/SAC/SAC.py delete mode 100644 l2rpn_baselines/SAC/SAC_NN.py delete mode 100644 l2rpn_baselines/SAC/SAC_NNParam.py delete mode 100644 l2rpn_baselines/SAC/__init__.py delete mode 100644 l2rpn_baselines/SAC/evaluate.py delete mode 100755 l2rpn_baselines/SAC/train.py diff --git a/docs/SAC.rst b/docs/SAC.rst deleted file mode 100644 index 688dc0f..0000000 --- a/docs/SAC.rst +++ /dev/null @@ -1,44 +0,0 @@ -SAC: Soft Actor Critic -========================= - -This baseline comes from the paper: -`Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor `_ - -**NB** This version is a new implementation of the SAC baselines. We recommend you to start using -it in new projects. The old version had some issues. Out of backward compatibility, it is still -available under the name "SACOld". - -Description ------------ -This module proposes an implementation of the SAC algorithm. - -An example to train this model is available in the train function :ref:`Example-sac`. - -Exported class --------------- -You can use this class with: - -.. code-block:: python - - from l2rpn_baselines.SAC import train, evaluate, SAC - -.. automodule:: l2rpn_baselines.SAC - :members: - :autosummary: - -Other non exported class ------------------------- -These classes need to be imported, if you want to import them with (non exhaustive list): -.. code-block:: python - - from l2rpn_baselines.SAC.SAC_NN import SAC_NN - from l2rpn_baselines.SAC.SAC_NNParam import SAC_NNParam - - -.. autoclass:: l2rpn_baselines.SAC.SAC_NN.SAC_NN - :members: - :autosummary: - -.. autoclass:: l2rpn_baselines.SAC.SAC_NNParam.SAC_NNParam - :members: - :autosummary: diff --git a/docs/conf.py b/docs/conf.py index 113d3f2..5e36f9a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,8 +22,8 @@ author = 'Benjamin DONNOT' # The full version, including alpha/beta/rc tags -release = '0.4.4' -version = '0.4' +release = '0.5.0' +version = '0.5' # -- General configuration --------------------------------------------------- diff --git a/docs/index.rst b/docs/index.rst index cc7641c..c2c53d5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -28,7 +28,6 @@ Baseline already Available DeepQSimple DoubleDuelingDQN DuelQSimple - SAC More advanced baselines diff --git a/l2rpn_baselines/SAC/SAC.py b/l2rpn_baselines/SAC/SAC.py deleted file mode 100644 index f3619d0..0000000 --- a/l2rpn_baselines/SAC/SAC.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2020, RTE (https://www.rte-france.com) -# See AUTHORS.txt -# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. -# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, -# you can obtain one at http://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. - -from l2rpn_baselines.utils import DeepQAgent -from l2rpn_baselines.SAC.SAC_NN import SAC_NN -DEFAULT_NAME = "SAC" - - -class SAC(DeepQAgent): - """ - This is the :class:`l2rpn_baselines.utils` agent representing the SAC agent. This does nothing in particular. - """ - pass diff --git a/l2rpn_baselines/SAC/SAC_NN.py b/l2rpn_baselines/SAC/SAC_NN.py deleted file mode 100644 index 172fb16..0000000 --- a/l2rpn_baselines/SAC/SAC_NN.py +++ /dev/null @@ -1,281 +0,0 @@ -# Copyright (c) 2020, RTE (https://www.rte-france.com) -# See AUTHORS.txt -# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. -# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, -# you can obtain one at http://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. - -import numpy as np -import os -import tensorflow as tf - -# tf2.0 friendly -import warnings - -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=FutureWarning) - from tensorflow.keras.models import load_model, Sequential, Model - from tensorflow.keras.layers import Activation, Dense - from tensorflow.keras.layers import Input, Concatenate - -from l2rpn_baselines.utils import BaseDeepQ, TrainingParam - - -# This class implements the "Sof Actor Critic" model. -# It is a custom implementation, courtesy to Clement Goubet -# The original paper is: https://arxiv.org/abs/1801.01290 -class SAC_NN(BaseDeepQ): - """ - Constructs the desired soft actor critic network. - - Compared to other baselines shown elsewhere (*eg* :class:`l2rpn_baselines.DeepQSimple` or - :class:`l2rpn_baselines.DeepQSimple`) the implementation of the SAC is a bit more tricky. - - However, we demonstrate here that the use of :class:`l2rpn_baselines.utils.BaseDeepQ` with custom - parameters class (in this calse :class:`SAC_NNParam` is flexible enough to meet our needs. - - References - ----------- - Original paper: - https://arxiv.org/abs/1801.01290 - - modified for discrete action space: - https://arxiv.org/abs/1910.07207 - """ - def __init__(self, - nn_params, - training_param=None, - verbose=False): - if training_param is None: - training_param = TrainingParam() - BaseDeepQ.__init__(self, - nn_params, - training_param, - verbose=verbose) - - # TODO add as meta param the number of "Q" you want to use (here 2) - # TODO add as meta param size and types of the networks - self.average_reward = 0 - self.life_spent = 1 - self.qvalue_evolution = np.zeros((0,)) - self.Is_nan = False - - self.model_value_target = None - self.model_value = None - self.model_Q = None - self.model_Q2 = None - self.model_policy = None - - self.previous_size = 0 - self.previous_eyes = None - self.previous_arange = None - self.previous_size_train = 0 - self.previous_eyes_train = None - - # optimizers and learning rate - self.schedule_lr_policy = None - self.optimizer_policy = None - self.schedule_lr_Q = None - self.optimizer_Q = None - self.schedule_lr_Q2 = None - self.optimizer_Q2 = None - self.schedule_lr_value = None - self.optimizer_value = None - - self.construct_q_network() - - def _build_q_NN(self): - input_states = Input(shape=(self._observation_size,)) - input_action = Input(shape=(self._action_size,)) - - input_layer = Concatenate()([input_states, input_action]) - lay = input_layer - for lay_num, (size, act) in enumerate(zip(self._nn_archi.sizes, self._nn_archi.activs)): - lay = Dense(size, name="layer_{}".format(lay_num))(lay) # put at self.action_size - lay = Activation(act)(lay) - - advantage = Dense(1, activation='linear')(lay) - - model = Model(inputs=[input_states, input_action], outputs=[advantage]) - return model - - def _build_model_value(self): - input_states = Input(shape=(self._observation_size,)) - - lay = input_states - for lay_num, (size, act) in enumerate(zip(self._nn_archi.sizes_value, self._nn_archi.activs_value)): - lay = Dense(size)(lay) - lay = Activation(act)(lay) - - advantage = Dense(self._action_size, activation='relu')(lay) - state_value = Dense(1, activation='linear', name="state_value")(advantage) - model = Model(inputs=[input_states], outputs=[state_value]) - return model - - def construct_q_network(self): - """ - This constructs all the networks needed for the SAC agent. - """ - self.model_Q = self._build_q_NN() - self.schedule_lr_Q, self.optimizer_Q = self.make_optimiser() - self.model_Q.compile(loss='mse', optimizer=self.optimizer_Q) - - self.model_Q2 = self._build_q_NN() - self.schedule_lr_Q2, self.optimizer_Q2 = self.make_optimiser() - self.model_Q2.compile(loss='mse', optimizer=self.optimizer_Q2) - - # state value function approximation - self.model_value = self._build_model_value() - self.schedule_lr_value, self.optimizer_value = self.make_optimiser() - self._optimizer_model = self.optimizer_value - self.model_value.compile(loss='mse', optimizer=self.optimizer_value) - - self.model_value_target = self._build_model_value() - self.model_value_target.set_weights(self.model_value.get_weights()) - - # policy function approximation - self.model_policy = Sequential() - # proba of choosing action a depending on policy pi - input_states = Input(shape=(self._observation_size,)) - lay = input_states - for lay_num, (size, act) in enumerate(zip(self._nn_archi.sizes_policy, self._nn_archi.activs_policy)): - lay = Dense(size)(lay) - lay = Activation(act)(lay) - soft_proba = Dense(self._action_size, activation="softmax", kernel_initializer='uniform', name="soft_proba")(lay) - self.model_policy = Model(inputs=[input_states], outputs=[soft_proba]) - self.schedule_lr_policy, self.optimizer_policy = self.make_optimiser() - self.model_policy.compile(loss='categorical_crossentropy', optimizer=self.optimizer_policy) - - def _get_eye_pm(self, batch_size): - if batch_size != self.previous_size: - tmp = np.zeros((batch_size, self._action_size), dtype=np.float32) - self.previous_eyes = tmp - self.previous_arange = np.arange(batch_size) - self.previous_size = batch_size - return self.previous_eyes, self.previous_arange - - def predict_movement(self, data, epsilon, batch_size=None, training=False): - """ - predict the next movements in a vectorized fashion - """ - if batch_size is None: - batch_size = data.shape[0] - rand_val = np.random.random(data.shape[0]) - p_actions = self.model_policy(data, training=training).numpy() - opt_policy_orig = np.argmax(np.abs(p_actions), axis=-1) - opt_policy = 1.0 * opt_policy_orig - opt_policy[rand_val < epsilon] = np.random.randint(0, self._action_size, size=(np.sum(rand_val < epsilon))) - opt_policy = opt_policy.astype(np.int) - idx = np.arange(batch_size) - return opt_policy, p_actions[idx, opt_policy], p_actions - - def _get_eye_train(self, batch_size): - if batch_size != self.previous_size_train: - self.previous_eyes_train = np.repeat(np.eye(self._action_size), - batch_size * np.ones(self._action_size, dtype=np.int), - axis=0) - self.previous_eyes_train = tf.convert_to_tensor(self.previous_eyes_train, dtype=tf.float32) - self.previous_size_train = batch_size - return self.previous_eyes_train - - def train(self, s_batch, a_batch, r_batch, d_batch, s2_batch, tf_writer=None, batch_size=None): - """Trains networks to fit given parameters""" - if batch_size is None: - batch_size = s_batch.shape[0] - target = np.zeros((batch_size, 1)) - - # training of the action state value networks - last_action = np.zeros((batch_size, self._action_size)) - - # Save the graph just the first time - if tf_writer is not None: - tf.summary.trace_on() - # TODO is it s2 or s ? For me it should be s... - fut_action = self.model_value_target(s2_batch, training=True).numpy().reshape(-1) - # TODO ***_target should be for the Q function instead imho - - if tf_writer is not None: - with tf_writer.as_default(): - tf.summary.trace_export("model_value_target-graph", 0) - tf.summary.trace_off() - - # TODO is it rather `targets[:, a_batch]` - target[:, 0] = r_batch + (1 - d_batch) * self._training_param.discount_factor * fut_action - # target[:, a_batch] = r_batch + (1 - d_batch) * self._training_param.discount_factor * fut_action - loss = self.model_Q.train_on_batch([s_batch, last_action], target) - loss_2 = self.model_Q2.train_on_batch([s_batch, last_action], target) - - self.life_spent += 1 - temp = 1 / np.log(self.life_spent) / 2 - tiled_batch = np.tile(s_batch, (self._action_size, 1)) - tiled_batch_ts = tf.convert_to_tensor(tiled_batch) - # tiled_batch: output something like: batch, batch, batch - # TODO save that somewhere not to compute it each time, you can even save this in the - # TODO tensorflow graph! - tmp = self._get_eye_train(batch_size) - - action_v1_orig = self.model_Q.predict([tiled_batch_ts, tmp], batch_size=batch_size).reshape(batch_size, -1) - action_v2_orig = self.model_Q2.predict([tiled_batch_ts, tmp], batch_size=batch_size).reshape(batch_size, -1) - action_v1 = action_v1_orig - np.amax(action_v1_orig, axis=-1).reshape(batch_size, 1) - new_proba = np.exp(action_v1 / temp) / np.sum(np.exp(action_v1 / temp), axis=-1).reshape(batch_size, 1) - new_proba_ts = tf.convert_to_tensor(new_proba) - loss_policy = self.model_policy.train_on_batch(s_batch, new_proba_ts) - - target_pi = self.model_policy.predict(s_batch, batch_size=batch_size) - value_target = np.fmin(action_v1_orig[0, a_batch], action_v2_orig[0, a_batch]) - np.sum( - target_pi * np.log(target_pi + 1e-6)) - value_target_ts = tf.convert_to_tensor(value_target.reshape(-1, 1)) - loss_value = self.model_value.train_on_batch(s_batch, value_target_ts) - - self.Is_nan = np.isnan(loss) + np.isnan(loss_2) + np.isnan(loss_policy) + np.isnan(loss_value) - return np.all(np.isfinite(loss)) & np.all(np.isfinite(loss_2)) & np.all(np.isfinite(loss_policy)) & \ - np.all(np.isfinite(loss_value)) - - @staticmethod - def _get_path_model(path, name=None): - if name is None: - path_model = path - else: - path_model = os.path.join(path, name) - path_target_model = "{}_target".format(path_model) - path_modelQ = "{}_Q".format(path_model) - path_modelQ2 = "{}_Q2".format(path_model) - path_policy = "{}_policy".format(path_model) - return path_model, path_target_model, path_modelQ, path_modelQ2, path_policy - - def save_network(self, path, name=None, ext="h5"): - """ - Saves all the models with unique names - """ - path_model, path_target_model, path_modelQ, path_modelQ2, path_policy = self._get_path_model(path, name) - self.model_value.save('{}.{}'.format(path_model, ext)) - self.model_value_target.save('{}.{}'.format(path_target_model, ext)) - self.model_Q.save('{}.{}'.format(path_modelQ, ext)) - self.model_Q2.save('{}.{}'.format(path_modelQ2, ext)) - self.model_policy.save('{}.{}'.format(path_policy, ext)) - - def load_network(self, path, name=None, ext="h5"): - """ - We load all the models using the keras "load_model" function. - """ - path_model, path_target_model, path_modelQ, path_modelQ2, path_policy = self._get_path_model(path, name) - self.construct_q_network() - self.model_value.load_weights('{}.{}'.format(path_model, ext)) - self.model_value_target.load_weights('{}.{}'.format(path_target_model, ext)) - self.model_Q.load_weights('{}.{}'.format(path_modelQ, ext)) - self.model_Q2.load_weights('{}.{}'.format(path_modelQ2, ext)) - self.model_policy.load_weights('{}.{}'.format(path_policy, ext)) - if self.verbose: - print("Succesfully loaded network.") - - def target_train(self): - """ - This update the target model. - """ - model_weights = self.model_value.get_weights() - target_model_weights = self.model_value_target.get_weights() - for i in range(len(model_weights)): - target_model_weights[i] = self._training_param.tau * model_weights[i] + (1 - self._training_param.tau) * \ - target_model_weights[i] - self.model_value_target.set_weights(model_weights) diff --git a/l2rpn_baselines/SAC/SAC_NNParam.py b/l2rpn_baselines/SAC/SAC_NNParam.py deleted file mode 100644 index 7e008ac..0000000 --- a/l2rpn_baselines/SAC/SAC_NNParam.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2020, RTE (https://www.rte-france.com) -# See AUTHORS.txt -# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. -# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, -# you can obtain one at http://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. -import copy - -from l2rpn_baselines.utils import NNParam -from l2rpn_baselines.SAC.SAC_NN import SAC_NN - - -class SAC_NNParam(NNParam): - """ - - Attributes - ---------- - sizes_value: ``list`` - List of integer, each one representing the size of the hidden layer for the "value" neural network. - - activs_value: ``list`` - List of ``str`` for each hidden layer of the "value" neural network, indicates which hidden layer to use - - sizes_policy: ``list`` - List of integers, each reprenseting the size of the hidden layer for the "policy" network. - - activs_policy: ``list`` - List of ``str``: The activation functions (for each layer) of the policy network - - """ - _int_attr = copy.deepcopy(NNParam._int_attr) - _float_attr = copy.deepcopy(NNParam._float_attr) - _str_attr = copy.deepcopy(NNParam._str_attr) - _list_float = copy.deepcopy(NNParam._list_float) - _list_str = copy.deepcopy(NNParam._list_str) - _list_int = copy.deepcopy(NNParam._list_int) - - _list_str += ["activs_value", "activs_policy"] - _list_int += ["sizes_value", "sizes_policy"] - - nn_class = SAC_NN - - def __init__(self, - action_size, - observation_size, # TODO this might not be usefull - sizes, - activs, - list_attr_obs, - sizes_value, - activs_value, - sizes_policy, - activs_policy - ): - NNParam.__init__(self, - action_size, - observation_size, # TODO this might not be usefull - sizes, - activs, - list_attr_obs - ) - self.sizes_value = sizes_value - self.activs_value = activs_value - self.sizes_policy = sizes_policy - self.activs_policy = activs_policy diff --git a/l2rpn_baselines/SAC/__init__.py b/l2rpn_baselines/SAC/__init__.py deleted file mode 100644 index 8ca58cf..0000000 --- a/l2rpn_baselines/SAC/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -__all__ = [ - "SAC", - "evaluate", - "train", - "SAC_NNParam" -] - -from l2rpn_baselines.SAC.SAC import SAC -from l2rpn_baselines.SAC.evaluate import evaluate -from l2rpn_baselines.SAC.train import train -from l2rpn_baselines.SAC.SAC_NNParam import SAC_NNParam diff --git a/l2rpn_baselines/SAC/evaluate.py b/l2rpn_baselines/SAC/evaluate.py deleted file mode 100644 index 3296245..0000000 --- a/l2rpn_baselines/SAC/evaluate.py +++ /dev/null @@ -1,204 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) 2020, RTE (https://www.rte-france.com) -# See AUTHORS.txt -# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. -# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, -# you can obtain one at http://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. - -import os -import tensorflow as tf - -from grid2op.MakeEnv import make -from grid2op.Runner import Runner -from grid2op.Reward import * -from grid2op.Action import * - -from l2rpn_baselines.utils.save_log_gif import save_log_gif -from l2rpn_baselines.SAC.SAC import SAC, DEFAULT_NAME -from l2rpn_baselines.SAC.SAC_NNParam import SAC_NNParam -from l2rpn_baselines.SAC.SAC_NN import SAC_NN - -DEFAULT_LOGS_DIR = "./logs-eval/do-nothing-baseline" -DEFAULT_NB_EPISODE = 1 -DEFAULT_NB_PROCESS = 1 -DEFAULT_MAX_STEPS = -1 - - -def evaluate(env, - name=DEFAULT_NAME, - load_path=None, - logs_path=DEFAULT_LOGS_DIR, - nb_episode=DEFAULT_NB_EPISODE, - nb_process=DEFAULT_NB_PROCESS, - max_steps=DEFAULT_MAX_STEPS, - verbose=False, - save_gif=False): - """ - How to evaluate the performances of the trained SAC agent. - - Parameters - ---------- - env: :class:`grid2op.Environment` - The environment on which you evaluate your agent. - - name: ``str`` - The name of the trained baseline - - load_path: ``str`` - Path where the agent has been stored - - logs_path: ``str`` - Where to write the results of the assessment - - nb_episode: ``str`` - How many episodes to run during the assessment of the performances - - nb_process: ``int`` - On how many process the assessment will be made. (setting this > 1 can lead to some speed ups but can be - unstable on some plaform) - - max_steps: ``int`` - How many steps at maximum your agent will be assessed - - verbose: ``bool`` - Currently un used - - save_gif: ``bool`` - Whether or not you want to save, as a gif, the performance of your agent. It might cause memory issues (might - take a lot of ram) and drastically increase computation time. - - Returns - ------- - agent: :class:`l2rpn_baselines.utils.DeepQAgent` - The loaded agent that has been evaluated thanks to the runner. - - res: ``list`` - The results of the Runner on which the agent was tested. - - - Examples - ------- - You can evaluate a DeepQSimple this way: - - .. code-block:: python - - from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward - from l2rpn_baselines.SAC import eval - - # Create dataset env - env = make("l2rpn_case14_sandbox", - reward_class=L2RPNSandBoxScore, - other_rewards={ - "reward": L2RPNReward - }) - - # Call evaluation interface - evaluate(env, - name="MyAwesomeAgent", - load_path="/WHERE/I/SAVED/THE/MODEL", - logs_path=None, - nb_episode=10, - nb_process=1, - max_steps=-1, - verbose=False, - save_gif=False) - """ - - # Limit gpu usage - physical_devices = tf.config.list_physical_devices('GPU') - if len(physical_devices): - tf.config.experimental.set_memory_growth(physical_devices[0], True) - - runner_params = env.get_params_for_runner() - runner_params["verbose"] = verbose - - if load_path is None: - raise RuntimeError("Cannot evaluate a model if there is nothing to be loaded.") - path_model, path_target_model = SAC_NN.get_path_model(load_path, name) - nn_archi = SAC_NNParam.from_json(os.path.join(path_model, "nn_architecture.json")) - - # Run - # Create agent - agent = SAC(action_space=env.action_space, - name=name, - store_action=nb_process == 1, - nn_archi=nn_archi, - observation_space=env.observation_space) - - # Load weights from file - agent.load(load_path) - - # Print model summary - stringlist = [] - agent.deep_q.model_value.summary(print_fn=lambda x: stringlist.append(x)) - short_model_summary = "\n".join(stringlist) - - if verbose: - print("Value model: {}".format(short_model_summary)) - - # Build runner - runner = Runner(**runner_params, - agentClass=None, - agentInstance=agent) - - # Run - os.makedirs(logs_path, exist_ok=True) - res = runner.run(path_save=logs_path, - nb_episode=nb_episode, - nb_process=nb_process, - max_iter=max_steps, - pbar=verbose) - - # Print summary - - if verbose: - print("Evaluation summary:") - for _, chron_name, cum_reward, nb_time_step, max_ts in res: - msg_tmp = "chronics at: {}".format(chron_name) - msg_tmp += "\ttotal score: {:.6f}".format(cum_reward) - msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts) - print(msg_tmp) - - if len(agent.dict_action): - # I output some of the actions played - print("The agent played {} different action".format(len(agent.dict_action))) - for id_, (nb, act, types) in agent.dict_action.items(): - print("Action with ID {} was played {} times".format(id_, nb)) - print("{}".format(act)) - print("-----------") - - if save_gif: - if verbose: - print("Saving the gif of the episodes") - save_log_gif(logs_path, res) - - return agent, res - - -if __name__ == "__main__": - from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward - from l2rpn_baselines.utils import cli_eval - - # Parse command line - args = cli_eval().parse_args() - - # Create dataset env - env = make(args.env_name, - reward_class=L2RPNSandBoxScore, - other_rewards={ - "reward": L2RPNReward - }) - - # Call evaluation interface - evaluate(env, - name=args.name, - load_path=os.path.abspath(args.load_path), - logs_path=args.logs_dir, - nb_episode=args.nb_episode, - nb_process=args.nb_process, - max_steps=args.max_steps, - verbose=args.verbose, - save_gif=args.save_gif) diff --git a/l2rpn_baselines/SAC/train.py b/l2rpn_baselines/SAC/train.py deleted file mode 100755 index da44016..0000000 --- a/l2rpn_baselines/SAC/train.py +++ /dev/null @@ -1,347 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) 2020, RTE (https://www.rte-france.com) -# See AUTHORS.txt -# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0. -# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file, -# you can obtain one at http://mozilla.org/MPL/2.0/. -# SPDX-License-Identifier: MPL-2.0 -# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. - -import os -import tensorflow as tf -import warnings - -from l2rpn_baselines.utils import cli_train -from l2rpn_baselines.SAC.SAC import SAC, DEFAULT_NAME -from l2rpn_baselines.SAC.SAC_NNParam import SAC_NNParam -from l2rpn_baselines.SAC.SAC_NN import SAC_NN -from l2rpn_baselines.utils import TrainingParam -from l2rpn_baselines.utils.waring_msgs import _WARN_GPU_MEMORY - - -def train(env, - name=DEFAULT_NAME, - iterations=1, - save_path=None, - load_path=None, - logs_dir=None, - training_param=None, - filter_action_fun=None, - verbose=True, - kwargs_converters={}, - kwargs_archi={}): - """ - This function implements the "training" part of the balines "DeepQSimple". - - Parameters - ---------- - env: :class:`grid2op.Environment` - Then environment on which you need to train your agent. - - name: ``str``` - The name of your agent. - - iterations: ``int`` - For how many iterations (steps) do you want to train your agent. NB these are not episode, these are steps. - - save_path: ``str`` - Where do you want to save your baseline. - - load_path: ``str`` - If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded - some of the argument provided to this function will not be used. - - logs_dir: ``str`` - Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. - - verbose: ``bool`` - If you want something to be printed on the terminal (a better logging strategy will be put at some point) - - training_param: :class:`l2rpn_baselines.utils.TrainingParam` - The parameters describing the way you will train your model. - - filter_action_fun: ``function`` - A function to filter the action space. See - `IdToAct.filter_action `_ - documentation. - - kwargs_converters: ``dict`` - A dictionary containing the key-word arguments pass at this initialization of the - :class:`grid2op.Converter.IdToAct` that serves as "Base" for the Agent. - - kwargs_archi: ``dict`` - Key word arguments used for making the :class:`DeepQ_NNParam` object that will be used to build the baseline. - - Returns - ------- - - baseline: :class:`DeepQSimple` - The trained baseline. - - - .. _Example-sac: - - Examples - --------- - Here is an example on how to train a SAC baseline. - - First define a python script, for example - - .. code-block:: python - - import grid2op - from grid2op.Reward import L2RPNReward - from l2rpn_baselines.utils import TrainingParam, NNParam - from l2rpn_baselines.SAC import train - - # define the environment - env = grid2op.make("l2rpn_case14_sandbox", - reward_class=L2RPNReward) - - # use the default training parameters - tp = TrainingParam() - - # this will be the list of what part of the observation I want to keep - # more information on https://grid2op.readthedocs.io/en/latest/observation.html#main-observation-attributes - li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", - "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", - "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] - - # neural network architecture - observation_size = NNParam.get_obs_size(env, li_attr_obs_X) - sizes_q = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers - sizes_v = [800, 800] # sizes of each hidden layers - sizes_pol = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers - kwargs_archi = {'observation_size': observation_size, - 'sizes': sizes_q, - 'activs': ["relu" for _ in range(len(sizes_q))], - "list_attr_obs": li_attr_obs_X, - "sizes_value": sizes_v, - "activs_value": ["relu" for _ in range(len(sizes_v))], - "sizes_policy": sizes_pol, - "activs_policy": ["relu" for _ in range(len(sizes_pol))] - } - - # select some part of the action - # more information at https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.init_converter - kwargs_converters = {"all_actions": None, - "set_line_status": False, - "change_bus_vect": True, - "set_topo_vect": False - } - # define the name of the model - nm_ = "AnneOnymous" - try: - train(env, - name=nm_, - iterations=10000, - save_path="/WHERE/I/SAVED/THE/MODEL", - load_path=None, - logs_dir="/WHERE/I/SAVED/THE/LOGS", - training_param=tp, - kwargs_converters=kwargs_converters, - kwargs_archi=kwargs_archi) - finally: - env.close() - - """ - - # Limit gpu usage - try: - physical_devices = tf.config.list_physical_devices('GPU') - if len(physical_devices) > 0: - tf.config.experimental.set_memory_growth(physical_devices[0], True) - except AttributeError: - # issue of https://stackoverflow.com/questions/59266150/attributeerror-module-tensorflow-core-api-v2-config-has-no-attribute-list-p - try: - physical_devices = tf.config.experimental.list_physical_devices('GPU') - if len(physical_devices) > 0: - tf.config.experimental.set_memory_growth(physical_devices[0], True) - except Exception: - warnings.warn(_WARN_GPU_MEMORY) - except Exception: - warnings.warn(_WARN_GPU_MEMORY) - - if training_param is None: - training_param = TrainingParam() - - # compute the proper size for the converter - kwargs_archi["action_size"] = SAC.get_action_size(env.action_space, filter_action_fun, kwargs_converters) - - if load_path is not None: - path_model, path_target_model = SAC_NN.get_path_model(load_path, name) - if verbose: - print("INFO: Reloading a model, the architecture parameters provided will be ignored") - nn_archi = SAC_NNParam.from_json(os.path.join(path_model, "nn_architecture.json")) - else: - nn_archi = SAC_NNParam(**kwargs_archi) - - baseline = SAC(action_space=env.action_space, - nn_archi=nn_archi, - name=name, - istraining=True, - verbose=verbose, - **kwargs_converters - ) - - if load_path is not None: - if verbose: - print("INFO: Reloading a model, training parameters will be ignored") - baseline.load(load_path) - training_param = baseline._training_param - - baseline.train(env, - iterations, - save_path=save_path, - logdir=logs_dir, - training_param=training_param) - # as in our example (and in our explanation) we recommend to save the mode regurlarly in the "train" function - # it is not necessary to save it again here. But if you chose not to follow these advice, it is more than - # recommended to save the "baseline" at the end of this function with: - # baseline.save(path_save) - - -if __name__ == "__main__": - # import grid2op - import numpy as np - from grid2op.Parameters import Parameters - from grid2op import make - from grid2op.Reward import L2RPNReward - import re - try: - from lightsim2grid.LightSimBackend import LightSimBackend - backend = LightSimBackend() - except: - from grid2op.Backend import PandaPowerBackend - backend = PandaPowerBackend() - - args = cli_train().parse_args() - - # is it highly recommended to modify the reward depening on the algorithm. - # for example here i will push my algorithm to learn that plyaing illegal or ambiguous action is bad - class MyReward(L2RPNReward): - def initialize(self, env): - self.reward_min = 0.0 - self.reward_max = 1.0 - - def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous): - if has_error or is_illegal or is_ambiguous: - # previous action was bad - res = self.reward_min - elif is_done: - # really strong reward if an episode is over without game over - res = self.reward_max - else: - res = super().__call__(action, env, has_error, is_done, is_illegal, is_ambiguous) - res /= env.n_line - if not np.isfinite(res): - res = self.reward_min - return res - - # Use custom params - - # Create grid2op game environement - env_init = None - try: - from grid2op.Chronics import MultifolderWithCache - except: - from grid2op.Chronics import MultiFolder - MultifolderWithCache = MultiFolder - - game_param = Parameters() - game_param.NB_TIMESTEP_COOLDOWN_SUB = 2 - game_param.NB_TIMESTEP_COOLDOWN_LINE = 2 - env = make(args.env_name, - param=game_param, - reward_class=MyReward, - backend=backend, - chronics_class=MultifolderWithCache - ) - # env.chronics_handler.set_max_iter(7*288) - try: - env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((03)|(72)|(57))$", x) is not None) - env.chronics_handler.real_data.reset() - except RuntimeError as exc_: - raise exc_ - except AttributeError as exc_: - # not available in all grid2op version - pass - # env.chronics_handler.real_data. - env_init = env - if args.nb_env > 1: - from l2rpn_baselines.utils import make_multi_env - env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env)) - - tp = TrainingParam() - - # NN training - tp.lr = 1e-4 - tp.lr_decay_steps = 30000 - tp.minibatch_size = 256 - tp.update_freq = 128 - - # limit the number of time steps played per scenarios - tp.step_increase_nb_iter = 100 # None to deactivate it - tp.min_iter = 10 - tp.update_nb_iter = 100 # once 100 scenarios are solved, increase of "step_increase_nb_iter" - - # oversampling hard scenarios - tp.oversampling_rate = 3 - - # experience replay - tp.buffer_size = 1000000 - - # e greedy - tp.min_observation = 10000 - tp.initial_epsilon = 0.4 - tp.final_epsilon = 1./(2*7*288.) - tp.step_for_final_epsilon = int(1e5) - - # don't start always at the same hour (if not None) otherwise random sampling, see docs - tp.random_sample_datetime_start = None - - # saving, logging etc. - tp.save_model_each = 10000 - tp.update_tensorboard_freq = 256 - - li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", - "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", - "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] - - # nn architecture - observation_size = SAC_NNParam.get_obs_size(env_init, li_attr_obs_X) - sizes_q = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers - sizes_v = [800, 800] # sizes of each hidden layers - sizes_pol = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers - kwargs_archi = {'observation_size': observation_size, - 'sizes': sizes_q, - 'activs': ["relu" for _ in range(len(sizes_q))], - "list_attr_obs": li_attr_obs_X, - "sizes_value": sizes_v, - "activs_value": ["relu" for _ in range(len(sizes_v))], - "sizes_policy": sizes_pol, - "activs_policy": ["relu" for _ in range(len(sizes_pol))] - } - - # which actions i keep - kwargs_converters = {"all_actions": None, - "set_line_status": False, - "change_bus_vect": True, - "set_topo_vect": False, - } - nm_ = args.name if args.name is not None else DEFAULT_NAME - try: - train(env, - name=nm_, - iterations=args.num_train_steps, - save_path=args.save_path, - load_path=args.load_path, - logs_dir=args.logs_dir, - training_param=tp, - kwargs_converters=kwargs_converters, - kwargs_archi=kwargs_archi) - finally: - env.close() - if args.nb_env > 1: - env_init.close() diff --git a/l2rpn_baselines/SACOld/SACOld.py b/l2rpn_baselines/SACOld/SACOld.py index ab5ad39..0f28e82 100644 --- a/l2rpn_baselines/SACOld/SACOld.py +++ b/l2rpn_baselines/SACOld/SACOld.py @@ -7,7 +7,7 @@ # This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions. from l2rpn_baselines.utils import DeepQAgent -from l2rpn_baselines.SAC.SAC_NN import SAC_NN +from l2rpn_baselines.SACOld.SACOld_NN import SACOld_NN DEFAULT_NAME = "SACOld" diff --git a/l2rpn_baselines/__init__.py b/l2rpn_baselines/__init__.py index 54668e5..b12a686 100644 --- a/l2rpn_baselines/__init__.py +++ b/l2rpn_baselines/__init__.py @@ -6,7 +6,6 @@ "SliceRDQN", "DeepQSimple", "DuelQSimple", - "SAC", "LeapNetEncoded", # Backward compatibility "SACOld", @@ -18,4 +17,4 @@ "utils" ] -__version__ = "0.4.4" +__version__ = "0.5.0" diff --git a/l2rpn_baselines/test/test_import.py b/l2rpn_baselines/test/test_import.py index ba16d6b..4c0456c 100644 --- a/l2rpn_baselines/test/test_import.py +++ b/l2rpn_baselines/test/test_import.py @@ -45,11 +45,6 @@ def load_module(self): return "DeepQSimple" -class TestSAC(TestImport, unittest.TestCase): - def load_module(self): - return "SAC" - - class TestSACOld(TestImport, unittest.TestCase): def load_module(self): return "SACOld" diff --git a/l2rpn_baselines/test/test_train_eval.py b/l2rpn_baselines/test/test_train_eval.py index b9f91a5..abd6998 100644 --- a/l2rpn_baselines/test/test_train_eval.py +++ b/l2rpn_baselines/test/test_train_eval.py @@ -25,8 +25,6 @@ from l2rpn_baselines.DuelQSimple import evaluate as eval_d3qs from l2rpn_baselines.SACOld import train as train_sacold from l2rpn_baselines.SACOld import evaluate as eval_sacold -from l2rpn_baselines.SAC import train as train_sac -from l2rpn_baselines.SAC import evaluate as eval_sac from l2rpn_baselines.DuelQLeapNet import train as train_leap from l2rpn_baselines.DuelQLeapNet import evaluate as eval_leap from l2rpn_baselines.LeapNetEncoded import train as train_leapenc @@ -343,62 +341,6 @@ def test_train_eval(self): save_gif=False) -class TestSAC(unittest.TestCase): - def test_train_eval(self): - tp = TrainingParam() - tp.buffer_size = 100 - tp.minibatch_size = 8 - tp.update_freq = 32 - tp.min_observation = 32 - tmp_dir = tempfile.mkdtemp() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - env = grid2op.make("rte_case5_example", test=True) - li_attr_obs_X = ["prod_p", "load_p", "rho"] - - # neural network architecture - observation_size = NNParam.get_obs_size(env, li_attr_obs_X) - sizes_q = [100, 50, 10] # sizes of each hidden layers - sizes_v = [100, 100] # sizes of each hidden layers - sizes_pol = [100, 10] # sizes of each hidden layers - kwargs_archi = {'observation_size': observation_size, - 'sizes': sizes_q, - 'activs': ["relu" for _ in range(len(sizes_q))], - "list_attr_obs": li_attr_obs_X, - "sizes_value": sizes_v, - "activs_value": ["relu" for _ in range(len(sizes_v))], - "sizes_policy": sizes_pol, - "activs_policy": ["relu" for _ in range(len(sizes_pol))] - } - - kwargs_converters = {"all_actions": None, - "set_line_status": False, - "change_bus_vect": True, - "set_topo_vect": False - } - nm_ = "AnneOnymous" - train_sac(env, - name=nm_, - iterations=100, - save_path=tmp_dir, - load_path=None, - logs_dir=tmp_dir, - training_param=tp, - verbose=False, - kwargs_converters=kwargs_converters, - kwargs_archi=kwargs_archi) - - baseline_2 = eval_sac(env, - name=nm_, - load_path=tmp_dir, - logs_path=tmp_dir, - nb_episode=1, - nb_process=1, - max_steps=30, - verbose=False, - save_gif=False) - - class TestLeapNet(unittest.TestCase): def test_train_eval(self): tp = TrainingParam() diff --git a/setup.py b/setup.py index dcaa052..4d90368 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ import setuptools from setuptools import setup -__version__ = "0.4.4" +__version__ = "0.5.0" pkgs = { From f0403bfaef75867b7713957c5df475d9ec175a90 Mon Sep 17 00:00:00 2001 From: BDonnot Date: Tue, 18 Aug 2020 15:11:45 +0200 Subject: [PATCH 3/3] updating the changelog --- CHANGELOG.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 32cf9de..e7441f2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,8 +4,10 @@ Change Log -------- - stack multiple states in `utils/DeepQAgent` -[0.5.0] - 2020-08-?? +[0.5.0] - 2020-08-18 -------------------- +- [BREAKING] remove the SAC baseline that was not correct. For backward compatibility, its code + can still be accessed with SACOld - [FIXED] the counting of the action types frequency in tensorboard (for some baselines) - [FIXED] a broken Replay buffer `utils.ReplayBuffer` (used in some baselines) - [FIXED] a bug in using multiple environments for some baselines