Merge pull request #57 from rte-france/bd-dev

version 0.8.0
Grid2op · Jul 24, 2023 · b5d9f41 · b5d9f41
2 parents fd742a9 + 65ff960
commit b5d9f41
Show file tree

Hide file tree

Showing 37 changed files with 754 additions and 71 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
+**__pycache__/
 *.py[cod]
 *$py.class
 
@@ -195,3 +196,7 @@ examples/ppo_stable_baselines/preprocess_act.json
 examples/ppo_stable_baselines/preprocess_obs.json
 l2rpn_baselines/test/END_RESULT_DATAFRAME.csv
 l2rpn_baselines/test/logs-eval/
+examples/ppo_stable_baselines/saved_model_2023/FirstAgent/FirstAgent_*_steps.zip
+examples/ppo_stable_baselines_idf_2023/saved_model_2023/FirstAgent/FirstAgent_*_steps.zip
+test_cmd/
+test_rho_select_scen.py
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -15,6 +15,17 @@ Change Log
 - train sowhere a working baseline (that does better than do nothing)
 - show an example of a baseline that uses a GNN
 
+[0.8.0] - 2023-07-24
+-------------------------
+- [BREAKING] remove support for gym, use gymnasium instead (if you still want
+  gym it should be fine to install `l2rpn-baselines` with `pip install l2rpn_baselines --no-deps` 
+  and install gym elsewhere. But we do not recommend to do so)
+- [BREAKING] change the signature of the "`GymEnvWithHeuristics.fix_action`"
+- [FIXED] a "bug" due to the gymnasium / gym in grid2op
+- [FIXED] way to retrieve the learning rate from Adam (in keras)
+- [FIXED] `PPO_RLLIB` with new version of ray, rllib, gymnasium etc. 
+- [ADDED] example for training a model based on the "l2rpn_idf_2023" environment
+
 [0.7.0] - 2023-07-13
 ------------------------
 - [ADDED] the "topo oracle agent" (contrib)

diff --git a/docs/conf.py b/docs/conf.py
@@ -22,8 +22,8 @@
 author = 'Benjamin DONNOT'
 
 # The full version, including alpha/beta/rc tags
-release = '0.7.0'
-version = '0.7'
+release = '0.8.0'
+version = '0.8'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/examples/ppo_stable_baselines/A_prep_env.py b/examples/ppo_stable_baselines/A_prep_env.py
@@ -14,7 +14,7 @@
 import grid2op
 from grid2op.dtypes import dt_int
 from grid2op.Agent import RecoPowerlineAgent
-from grid2op.utils import EpisodeStatistics, ScoreL2RPN2022, ScoreICAPS2021
+from grid2op.utils import EpisodeStatistics, ScoreL2RPN2022, ScoreICAPS2021, ScoreL2RPN2023
 from lightsim2grid import LightSimBackend
 import numpy as np
 
@@ -23,6 +23,10 @@
 env_name = "l2rpn_wcci_2022"
 SCOREUSED = ScoreL2RPN2022  # ScoreICAPS2021
 
+env_name = "l2rpn_idf_2023"
+SCOREUSED = ScoreL2RPN2023  # ScoreICAPS2021
+
+
 name_stats = "_reco_powerline"
 nb_process_stats = 4 if not is_windows else 1
 # if you still want to use multi processing on windows
@@ -137,7 +141,12 @@ def get_env_seed(env_name: str):
         if nm_ == nm_val:
             # save the normalization parameters from the validation set
             dict_ = {"subtract": {}, 'divide': {}}
-            for attr_nm in ["gen_p", "load_p", "p_or", "rho"]:
+            for attr_nm in ["gen_p", "load_p", "p_or", "rho",
+                            "timestep_overflow", "line_status",
+                            "actual_dispatch", "target_dispatch",
+                            "storage_charge", "storage_power",
+                            "curtailment", "curtailment_limit",  "gen_p_before_curtail",
+                            ]:
                 avg_ = stats_reco.get(attr_nm)[0].mean(axis=0)
                 std_ = stats_reco.get(attr_nm)[0].std(axis=0)
                 dict_["subtract"][attr_nm] = [float(el) for el in avg_]

diff --git a/examples/ppo_stable_baselines/B_train_agent.py b/examples/ppo_stable_baselines/B_train_agent.py
@@ -21,7 +21,9 @@
 from l2rpn_baselines.utils import GymEnvWithReco, GymEnvWithRecoWithDN
 
 env_name = "l2rpn_wcci_2022_train"
-save_path = "./saved_model"
+
+env_name = "l2rpn_idf_2023_train"
+save_path = "./saved_model_2023"
 name = "FirstAgent"
 gymenv_class = GymEnvWithRecoWithDN  # uses the heuristic to do nothing is the grid is not at risk and to reconnect powerline automatically
 max_iter = 7 * 24 * 12  # None to deactivate it
@@ -124,7 +126,6 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
                         # curtailment part of the observation
                         "curtailment", "curtailment_limit",  "gen_p_before_curtail",
                         ]
-    TODO = ...
     # same here you can change it as you please
     act_attr_to_keep = ["redispatch", "curtail", "set_storage"]
     # parameters for the learning
@@ -156,15 +157,16 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
     param.LIMIT_INFEASIBLE_CURTAILMENT_STORAGE_ACTION = True
     env.change_parameters(param)
 
-    if max_iter is not None:
-        env.set_max_iter(max_iter)  # one week
-    obs = env.reset()
     # train on all february month, why not ?
-    env.chronics_handler.real_data.set_filter(lambda x: re.match(r".*2050-02-.*$", x) is not None)
+    env.chronics_handler.real_data.set_filter(lambda x: re.match(r".*2035-02-.*$", x) is not None)
     env.chronics_handler.real_data.reset()
     # see https://grid2op.readthedocs.io/en/latest/environment.html#optimize-the-data-pipeline
     # for more information !
 
+    if max_iter is not None:
+        env.set_max_iter(max_iter)  # one week
+    obs = env.reset()
+
     print("environment loaded !")
     trained_agent = train(
             env,

diff --git a/examples/ppo_stable_baselines/C_evaluate_trained_model.py b/examples/ppo_stable_baselines/C_evaluate_trained_model.py
@@ -10,7 +10,7 @@
 import numpy as np
 
 import grid2op
-from grid2op.utils import ScoreL2RPN2022
+from grid2op.utils import ScoreL2RPN2023
 from grid2op.Agent import RecoPowerlineAgent
 
 from lightsim2grid import LightSimBackend
@@ -23,13 +23,15 @@
 # and use a different parameter for evaluation than the one used for 
 # training.
 
-env_name = "l2rpn_wcci_2022_val"
-SCOREUSED = ScoreL2RPN2022
+# env_name = "l2rpn_wcci_2022_val"
+# SCOREUSED = ScoreL2RPN2022
+env_name = "l2rpn_idf_2023_val"
+SCOREUSED = ScoreL2RPN2023
 
 agent_name = name
 nb_scenario = 10
 nb_process_stats = 1
-load_path = "./saved_model"
+load_path = "./saved_model_2023"
 iter_num = None  # put None for the latest version
 verbose = True
 
@@ -61,6 +63,7 @@ def get_ts_survived_dn(env_name):
     res -= 1  # the first observation (after reset) is counted as a step in the runner
     return res
 
+
 def get_ts_survived_reco(env_name):
     dict_ = _aux_get_env(env_name, name_stat=name_stats)
     res = []
@@ -103,7 +106,7 @@ def get_ts_survived_reco(env_name):
     scores_r, n_played_r, total_ts_r = my_score.get(RecoPowerlineAgent(env_val.action_space))
     scores, n_played, total_ts = my_score.get(my_agent)
 
-    res_scores = {"scores": [float(score) for score in scores],
+    res_scores = {"scores": [float(score[0]) for score in scores],
                   "n_played": [int(el) for el in n_played],
                   "total_ts": [int(el) for el in total_ts]}
 
@@ -112,7 +115,7 @@ def get_ts_survived_reco(env_name):
     for score, my_ts, dn_ts in zip(scores, n_played, dn_ts_survived):
         print(f"\t{':-)' if my_ts >= dn_ts else ':-('}:"
               f"\n\t\t- I survived {my_ts} steps vs {dn_ts} for do nothing ({my_ts - dn_ts})"
-              f"\n\t\t- my score is {score:.2f} (do nothing is 0.)")
+              f"\n\t\t- my score is {score[0]:.2f} (do nothing is 15.)")
         best_than_dn += my_ts >= dn_ts
     print(f"The agent \"{agent_name}\" beats \"do nothing\" baseline in {best_than_dn} out of {len(dn_ts_survived)} episodes")
 
@@ -121,6 +124,6 @@ def get_ts_survived_reco(env_name):
     for score, my_ts, reco_ts, score_ in zip(scores, n_played, reco_ts_survived, scores_r):
         print(f"\t{':-)' if my_ts >= reco_ts else ':-('}:"
               f"\n\t\t- I survived {my_ts} steps vs {reco_ts} for reco powerline ({my_ts - reco_ts})"
-              f"\n\t\t- my score is {score:.2f} (reco powerline: {score_:.2f})")
+              f"\n\t\t- my score is {score[0]:.2f} (reco powerline: {score_[0]:.2f})")
         best_than_reco += my_ts >= reco_ts
     print(f"The agent \"{agent_name}\" beats \"reco powerline\" baseline in {best_than_reco} out of {len(reco_ts_survived)} episodes")
diff --git a/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/.normalize_act b/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/.normalize_act
@@ -0,0 +1,2 @@
+I have encoded the action space !
+ DO NOT MODIFY !
diff --git a/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/.normalize_obs b/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/.normalize_obs
@@ -0,0 +1,2 @@
+I have encoded the observation space !
+ DO NOT MODIFY !
diff --git a/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/FirstAgent.zip b/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/FirstAgent.zip
diff --git a/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/act_attr_to_keep.json b/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/act_attr_to_keep.json
@@ -0,0 +1 @@
+["redispatch", "curtail", "set_storage"]
diff --git a/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/obs_attr_to_keep.json b/examples/ppo_stable_baselines/saved_model_2023/FirstAgent/obs_attr_to_keep.json
@@ -0,0 +1 @@
+["month", "day_of_week", "hour_of_day", "minute_of_hour", "gen_p", "load_p", "p_or", "rho", "timestep_overflow", "line_status", "actual_dispatch", "target_dispatch", "storage_charge", "storage_power", "curtailment", "curtailment_limit", "gen_p_before_curtail"]
diff --git a/examples/ppo_stable_baselines_idf_2023/A_prep_env.py b/examples/ppo_stable_baselines_idf_2023/A_prep_env.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2020-2022, RTE (https://www.rte-france.com)
+# See AUTHORS.txt
+# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
+# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
+# you can obtain one at http://mozilla.org/MPL/2.0/.
+# SPDX-License-Identifier: MPL-2.0
+# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
+
+# this file needs to be run only once, it might take a while !
+# we show it here because we want to include the "full stack"
+
+import os
+import json
+import sys
+import numpy as np
+import grid2op
+from grid2op.dtypes import dt_int
+from grid2op.Agent import RecoPowerlineAgent
+from grid2op.utils import EpisodeStatistics, ScoreL2RPN2022, ScoreICAPS2021, ScoreL2RPN2023
+from lightsim2grid import LightSimBackend
+
+is_windows = sys.platform.startswith("win32")
+
+env_name = "l2rpn_idf_2023"
+SCOREUSED = ScoreL2RPN2023
+
+name_stats = "_reco_powerline"
+nb_process_stats = 4 if not is_windows else 1
+# if you still want to use multi processing on windows
+# have a look at the `env.generate_classe()` function
+verbose = 1
+deep_copy = is_windows  # force the deep copy on windows (due to permission issue in symlink in windows)
+
+
+def _aux_get_env(env_name, dn=True, name_stat=None):
+    path_ = grid2op.get_current_local_dir()
+    path_env = os.path.join(path_, env_name)
+    if not os.path.exists(path_env):
+        raise RuntimeError(f"The environment \"{env_name}\" does not exist.")
+
+    if SCOREUSED == ScoreICAPS2021:
+        path_dn = os.path.join(path_env, "_statistics_icaps2021_dn")
+    else:
+        path_dn = os.path.join(path_env, "_statistics_l2rpn_dn")
+
+    if not os.path.exists(path_dn):
+        raise RuntimeError("The folder _statistics_icaps2021_dn (or _statistics_l2rpn_dn) used for computing the score do not exist")
+    path_reco = os.path.join(path_env, "_statistics_l2rpn_no_overflow_reco")
+    if not os.path.exists(path_reco):
+        raise RuntimeError("The folder _statistics_l2rpn_no_overflow_reco used for computing the score do not exist")
+
+    if name_stat is None:
+        if dn:
+            path_metadata = os.path.join(path_dn, "metadata.json")
+        else:
+            path_metadata = os.path.join(path_reco, "metadata.json")
+    else:
+        path_stat = os.path.join(path_env, EpisodeStatistics.get_name_dir(name_stat))
+        if not os.path.exists(path_stat):
+            raise RuntimeError(f"No folder associated with statistics {name_stat}")
+        path_metadata = os.path.join(path_stat, "metadata.json")
+
+    if not os.path.exists(path_metadata):
+        raise RuntimeError("No metadata can be found for the statistics you wanted to compute.")
+
+    with open(path_metadata, "r", encoding="utf-8") as f:
+        dict_ = json.load(f)
+
+    return dict_
+
+
+def get_env_seed(env_name: str):
+    """This function ensures that you can reproduce the results of the computed scenarios.
+    
+    It forces the seeds of the environment, during evaluation to be the same as the one used during the evaluation of the score.
+    
+    As environments are stochastic in grid2op, it is very important that you use this function (or a similar one) before
+    computing the scores of your agent.
+
+    Args:
+        env_name (str): The environment name on which you want to retrieve the seeds used
+
+    Raises:
+        RuntimeError: When it is not possible to retrieve the seeds (for example when the "statistics" has not been computed)
+
+    Returns:
+        [type]: [description]
+    """
+
+    dict_ = _aux_get_env(env_name)
+
+    key = "env_seeds"
+    if key not in dict_:
+        raise RuntimeError(f"Impossible to find the key {key} in the dictionnary. You should re run the score function.")
+
+    return dict_[key]
+
+
+if __name__ == "__main__":
+    # create the environment 
+    env = grid2op.make(env_name)
+
+    # split into train / val / test
+    # it is such that there are 25 chronics for val and 24 for test
+    env.seed(1)
+    env.reset()
+    nm_train, nm_val, nm_test = env.train_val_split_random(add_for_test="test",
+                                                           pct_val=4.2,
+                                                           pct_test=4.2,
+                                                           deep_copy=deep_copy)
+
+    # computes some statistics for val / test to compare performance of 
+    # some agents with the do nothing for example
+    max_int = max_int = np.iinfo(dt_int).max
+    for nm_ in [nm_val, nm_test]:
+        env_tmp = grid2op.make(nm_, backend=LightSimBackend())
+        nb_scenario = len(env_tmp.chronics_handler.subpaths)
+        print(f"{nm_}: {nb_scenario}")
+        my_score = SCOREUSED(env_tmp,
+                             nb_scenario=nb_scenario,
+                             env_seeds=np.random.randint(low=0,
+                                                         high=max_int,
+                                                         size=nb_scenario,
+                                                         dtype=dt_int),
+                             agent_seeds=[0 for _ in range(nb_scenario)],
+                             verbose=verbose,
+                             nb_process_stats=nb_process_stats,
+                             )
+
+        # compute statistics for reco powerline
+        seeds = get_env_seed(nm_)
+        reco_powerline_agent = RecoPowerlineAgent(env_tmp.action_space)
+        stats_reco = EpisodeStatistics(env_tmp, name_stats=name_stats)
+        stats_reco.compute(nb_scenario=nb_scenario,
+                           agent=reco_powerline_agent,
+                           env_seeds=seeds)
+
+        if nm_ == nm_val:
+            # save the normalization parameters from the validation set
+            dict_ = {"subtract": {}, 'divide': {}}
+            for attr_nm in ["gen_p", "load_p", "p_or", "rho",
+                            "timestep_overflow", "line_status",
+                            "actual_dispatch", "target_dispatch",
+                            "storage_charge", "storage_power",
+                            "curtailment", "curtailment_limit",  "gen_p_before_curtail",
+                            ]:
+                avg_ = stats_reco.get(attr_nm)[0].mean(axis=0)
+                std_ = stats_reco.get(attr_nm)[0].std(axis=0)
+                dict_["subtract"][attr_nm] = [float(el) for el in avg_]
+                dict_["divide"][attr_nm] = [max(float(el), 1.0) for el in std_]
+
+            with open("preprocess_obs.json", "w", encoding="utf-8") as f:
+                json.dump(obj=dict_, fp=f)
+
+            act_space_kwargs = {"add": {"redispatch": [0. for gen_id in range(env.n_gen) if env.gen_redispatchable[gen_id]],
+                                        "set_storage": [0. for _ in range(env.n_storage)]},
+                                'multiply': {"redispatch": [1. / (max(float(el), 1.0)) for gen_id, el in enumerate(env.gen_max_ramp_up) if env.gen_redispatchable[gen_id]],
+                                             "set_storage": [1. / (max(float(el), 1.0)) for el in env.storage_max_p_prod]}
+                               }
+            with open("preprocess_act.json", "w", encoding="utf-8") as f:
+                json.dump(obj=act_space_kwargs, fp=f)