Skip to content

Commit

Permalink
Merge pull request #57 from rte-france/bd-dev
Browse files Browse the repository at this point in the history
version 0.8.0
  • Loading branch information
BDonnot authored Jul 24, 2023
2 parents fd742a9 + 65ff960 commit b5d9f41
Show file tree
Hide file tree
Showing 37 changed files with 754 additions and 71 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Byte-compiled / optimized / DLL files
__pycache__/
**__pycache__/
*.py[cod]
*$py.class

Expand Down Expand Up @@ -195,3 +196,7 @@ examples/ppo_stable_baselines/preprocess_act.json
examples/ppo_stable_baselines/preprocess_obs.json
l2rpn_baselines/test/END_RESULT_DATAFRAME.csv
l2rpn_baselines/test/logs-eval/
examples/ppo_stable_baselines/saved_model_2023/FirstAgent/FirstAgent_*_steps.zip
examples/ppo_stable_baselines_idf_2023/saved_model_2023/FirstAgent/FirstAgent_*_steps.zip
test_cmd/
test_rho_select_scen.py
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ Change Log
- train sowhere a working baseline (that does better than do nothing)
- show an example of a baseline that uses a GNN

[0.8.0] - 2023-07-24
-------------------------
- [BREAKING] remove support for gym, use gymnasium instead (if you still want
gym it should be fine to install `l2rpn-baselines` with `pip install l2rpn_baselines --no-deps`
and install gym elsewhere. But we do not recommend to do so)
- [BREAKING] change the signature of the "`GymEnvWithHeuristics.fix_action`"
- [FIXED] a "bug" due to the gymnasium / gym in grid2op
- [FIXED] way to retrieve the learning rate from Adam (in keras)
- [FIXED] `PPO_RLLIB` with new version of ray, rllib, gymnasium etc.
- [ADDED] example for training a model based on the "l2rpn_idf_2023" environment

[0.7.0] - 2023-07-13
------------------------
- [ADDED] the "topo oracle agent" (contrib)
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
author = 'Benjamin DONNOT'

# The full version, including alpha/beta/rc tags
release = '0.7.0'
version = '0.7'
release = '0.8.0'
version = '0.8'

# -- General configuration ---------------------------------------------------

Expand Down
13 changes: 11 additions & 2 deletions examples/ppo_stable_baselines/A_prep_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import grid2op
from grid2op.dtypes import dt_int
from grid2op.Agent import RecoPowerlineAgent
from grid2op.utils import EpisodeStatistics, ScoreL2RPN2022, ScoreICAPS2021
from grid2op.utils import EpisodeStatistics, ScoreL2RPN2022, ScoreICAPS2021, ScoreL2RPN2023
from lightsim2grid import LightSimBackend
import numpy as np

Expand All @@ -23,6 +23,10 @@
env_name = "l2rpn_wcci_2022"
SCOREUSED = ScoreL2RPN2022 # ScoreICAPS2021

env_name = "l2rpn_idf_2023"
SCOREUSED = ScoreL2RPN2023 # ScoreICAPS2021


name_stats = "_reco_powerline"
nb_process_stats = 4 if not is_windows else 1
# if you still want to use multi processing on windows
Expand Down Expand Up @@ -137,7 +141,12 @@ def get_env_seed(env_name: str):
if nm_ == nm_val:
# save the normalization parameters from the validation set
dict_ = {"subtract": {}, 'divide': {}}
for attr_nm in ["gen_p", "load_p", "p_or", "rho"]:
for attr_nm in ["gen_p", "load_p", "p_or", "rho",
"timestep_overflow", "line_status",
"actual_dispatch", "target_dispatch",
"storage_charge", "storage_power",
"curtailment", "curtailment_limit", "gen_p_before_curtail",
]:
avg_ = stats_reco.get(attr_nm)[0].mean(axis=0)
std_ = stats_reco.get(attr_nm)[0].std(axis=0)
dict_["subtract"][attr_nm] = [float(el) for el in avg_]
Expand Down
14 changes: 8 additions & 6 deletions examples/ppo_stable_baselines/B_train_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
from l2rpn_baselines.utils import GymEnvWithReco, GymEnvWithRecoWithDN

env_name = "l2rpn_wcci_2022_train"
save_path = "./saved_model"

env_name = "l2rpn_idf_2023_train"
save_path = "./saved_model_2023"
name = "FirstAgent"
gymenv_class = GymEnvWithRecoWithDN # uses the heuristic to do nothing is the grid is not at risk and to reconnect powerline automatically
max_iter = 7 * 24 * 12 # None to deactivate it
Expand Down Expand Up @@ -124,7 +126,6 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
# curtailment part of the observation
"curtailment", "curtailment_limit", "gen_p_before_curtail",
]
TODO = ...
# same here you can change it as you please
act_attr_to_keep = ["redispatch", "curtail", "set_storage"]
# parameters for the learning
Expand Down Expand Up @@ -156,15 +157,16 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
param.LIMIT_INFEASIBLE_CURTAILMENT_STORAGE_ACTION = True
env.change_parameters(param)

if max_iter is not None:
env.set_max_iter(max_iter) # one week
obs = env.reset()
# train on all february month, why not ?
env.chronics_handler.real_data.set_filter(lambda x: re.match(r".*2050-02-.*$", x) is not None)
env.chronics_handler.real_data.set_filter(lambda x: re.match(r".*2035-02-.*$", x) is not None)
env.chronics_handler.real_data.reset()
# see https://grid2op.readthedocs.io/en/latest/environment.html#optimize-the-data-pipeline
# for more information !

if max_iter is not None:
env.set_max_iter(max_iter) # one week
obs = env.reset()

print("environment loaded !")
trained_agent = train(
env,
Expand Down
17 changes: 10 additions & 7 deletions examples/ppo_stable_baselines/C_evaluate_trained_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np

import grid2op
from grid2op.utils import ScoreL2RPN2022
from grid2op.utils import ScoreL2RPN2023
from grid2op.Agent import RecoPowerlineAgent

from lightsim2grid import LightSimBackend
Expand All @@ -23,13 +23,15 @@
# and use a different parameter for evaluation than the one used for
# training.

env_name = "l2rpn_wcci_2022_val"
SCOREUSED = ScoreL2RPN2022
# env_name = "l2rpn_wcci_2022_val"
# SCOREUSED = ScoreL2RPN2022
env_name = "l2rpn_idf_2023_val"
SCOREUSED = ScoreL2RPN2023

agent_name = name
nb_scenario = 10
nb_process_stats = 1
load_path = "./saved_model"
load_path = "./saved_model_2023"
iter_num = None # put None for the latest version
verbose = True

Expand Down Expand Up @@ -61,6 +63,7 @@ def get_ts_survived_dn(env_name):
res -= 1 # the first observation (after reset) is counted as a step in the runner
return res


def get_ts_survived_reco(env_name):
dict_ = _aux_get_env(env_name, name_stat=name_stats)
res = []
Expand Down Expand Up @@ -103,7 +106,7 @@ def get_ts_survived_reco(env_name):
scores_r, n_played_r, total_ts_r = my_score.get(RecoPowerlineAgent(env_val.action_space))
scores, n_played, total_ts = my_score.get(my_agent)

res_scores = {"scores": [float(score) for score in scores],
res_scores = {"scores": [float(score[0]) for score in scores],
"n_played": [int(el) for el in n_played],
"total_ts": [int(el) for el in total_ts]}

Expand All @@ -112,7 +115,7 @@ def get_ts_survived_reco(env_name):
for score, my_ts, dn_ts in zip(scores, n_played, dn_ts_survived):
print(f"\t{':-)' if my_ts >= dn_ts else ':-('}:"
f"\n\t\t- I survived {my_ts} steps vs {dn_ts} for do nothing ({my_ts - dn_ts})"
f"\n\t\t- my score is {score:.2f} (do nothing is 0.)")
f"\n\t\t- my score is {score[0]:.2f} (do nothing is 15.)")
best_than_dn += my_ts >= dn_ts
print(f"The agent \"{agent_name}\" beats \"do nothing\" baseline in {best_than_dn} out of {len(dn_ts_survived)} episodes")

Expand All @@ -121,6 +124,6 @@ def get_ts_survived_reco(env_name):
for score, my_ts, reco_ts, score_ in zip(scores, n_played, reco_ts_survived, scores_r):
print(f"\t{':-)' if my_ts >= reco_ts else ':-('}:"
f"\n\t\t- I survived {my_ts} steps vs {reco_ts} for reco powerline ({my_ts - reco_ts})"
f"\n\t\t- my score is {score:.2f} (reco powerline: {score_:.2f})")
f"\n\t\t- my score is {score[0]:.2f} (reco powerline: {score_[0]:.2f})")
best_than_reco += my_ts >= reco_ts
print(f"The agent \"{agent_name}\" beats \"reco powerline\" baseline in {best_than_reco} out of {len(reco_ts_survived)} episodes")
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
I have encoded the action space !
DO NOT MODIFY !
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
I have encoded the observation space !
DO NOT MODIFY !
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["redispatch", "curtail", "set_storage"]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["month", "day_of_week", "hour_of_day", "minute_of_hour", "gen_p", "load_p", "p_or", "rho", "timestep_overflow", "line_status", "actual_dispatch", "target_dispatch", "storage_charge", "storage_power", "curtailment", "curtailment_limit", "gen_p_before_curtail"]
161 changes: 161 additions & 0 deletions examples/ppo_stable_baselines_idf_2023/A_prep_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
# Copyright (c) 2020-2022, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

# this file needs to be run only once, it might take a while !
# we show it here because we want to include the "full stack"

import os
import json
import sys
import numpy as np
import grid2op
from grid2op.dtypes import dt_int
from grid2op.Agent import RecoPowerlineAgent
from grid2op.utils import EpisodeStatistics, ScoreL2RPN2022, ScoreICAPS2021, ScoreL2RPN2023
from lightsim2grid import LightSimBackend

is_windows = sys.platform.startswith("win32")

env_name = "l2rpn_idf_2023"
SCOREUSED = ScoreL2RPN2023

name_stats = "_reco_powerline"
nb_process_stats = 4 if not is_windows else 1
# if you still want to use multi processing on windows
# have a look at the `env.generate_classe()` function
verbose = 1
deep_copy = is_windows # force the deep copy on windows (due to permission issue in symlink in windows)


def _aux_get_env(env_name, dn=True, name_stat=None):
path_ = grid2op.get_current_local_dir()
path_env = os.path.join(path_, env_name)
if not os.path.exists(path_env):
raise RuntimeError(f"The environment \"{env_name}\" does not exist.")

if SCOREUSED == ScoreICAPS2021:
path_dn = os.path.join(path_env, "_statistics_icaps2021_dn")
else:
path_dn = os.path.join(path_env, "_statistics_l2rpn_dn")

if not os.path.exists(path_dn):
raise RuntimeError("The folder _statistics_icaps2021_dn (or _statistics_l2rpn_dn) used for computing the score do not exist")
path_reco = os.path.join(path_env, "_statistics_l2rpn_no_overflow_reco")
if not os.path.exists(path_reco):
raise RuntimeError("The folder _statistics_l2rpn_no_overflow_reco used for computing the score do not exist")

if name_stat is None:
if dn:
path_metadata = os.path.join(path_dn, "metadata.json")
else:
path_metadata = os.path.join(path_reco, "metadata.json")
else:
path_stat = os.path.join(path_env, EpisodeStatistics.get_name_dir(name_stat))
if not os.path.exists(path_stat):
raise RuntimeError(f"No folder associated with statistics {name_stat}")
path_metadata = os.path.join(path_stat, "metadata.json")

if not os.path.exists(path_metadata):
raise RuntimeError("No metadata can be found for the statistics you wanted to compute.")

with open(path_metadata, "r", encoding="utf-8") as f:
dict_ = json.load(f)

return dict_


def get_env_seed(env_name: str):
"""This function ensures that you can reproduce the results of the computed scenarios.
It forces the seeds of the environment, during evaluation to be the same as the one used during the evaluation of the score.
As environments are stochastic in grid2op, it is very important that you use this function (or a similar one) before
computing the scores of your agent.
Args:
env_name (str): The environment name on which you want to retrieve the seeds used
Raises:
RuntimeError: When it is not possible to retrieve the seeds (for example when the "statistics" has not been computed)
Returns:
[type]: [description]
"""

dict_ = _aux_get_env(env_name)

key = "env_seeds"
if key not in dict_:
raise RuntimeError(f"Impossible to find the key {key} in the dictionnary. You should re run the score function.")

return dict_[key]


if __name__ == "__main__":
# create the environment
env = grid2op.make(env_name)

# split into train / val / test
# it is such that there are 25 chronics for val and 24 for test
env.seed(1)
env.reset()
nm_train, nm_val, nm_test = env.train_val_split_random(add_for_test="test",
pct_val=4.2,
pct_test=4.2,
deep_copy=deep_copy)

# computes some statistics for val / test to compare performance of
# some agents with the do nothing for example
max_int = max_int = np.iinfo(dt_int).max
for nm_ in [nm_val, nm_test]:
env_tmp = grid2op.make(nm_, backend=LightSimBackend())
nb_scenario = len(env_tmp.chronics_handler.subpaths)
print(f"{nm_}: {nb_scenario}")
my_score = SCOREUSED(env_tmp,
nb_scenario=nb_scenario,
env_seeds=np.random.randint(low=0,
high=max_int,
size=nb_scenario,
dtype=dt_int),
agent_seeds=[0 for _ in range(nb_scenario)],
verbose=verbose,
nb_process_stats=nb_process_stats,
)

# compute statistics for reco powerline
seeds = get_env_seed(nm_)
reco_powerline_agent = RecoPowerlineAgent(env_tmp.action_space)
stats_reco = EpisodeStatistics(env_tmp, name_stats=name_stats)
stats_reco.compute(nb_scenario=nb_scenario,
agent=reco_powerline_agent,
env_seeds=seeds)

if nm_ == nm_val:
# save the normalization parameters from the validation set
dict_ = {"subtract": {}, 'divide': {}}
for attr_nm in ["gen_p", "load_p", "p_or", "rho",
"timestep_overflow", "line_status",
"actual_dispatch", "target_dispatch",
"storage_charge", "storage_power",
"curtailment", "curtailment_limit", "gen_p_before_curtail",
]:
avg_ = stats_reco.get(attr_nm)[0].mean(axis=0)
std_ = stats_reco.get(attr_nm)[0].std(axis=0)
dict_["subtract"][attr_nm] = [float(el) for el in avg_]
dict_["divide"][attr_nm] = [max(float(el), 1.0) for el in std_]

with open("preprocess_obs.json", "w", encoding="utf-8") as f:
json.dump(obj=dict_, fp=f)

act_space_kwargs = {"add": {"redispatch": [0. for gen_id in range(env.n_gen) if env.gen_redispatchable[gen_id]],
"set_storage": [0. for _ in range(env.n_storage)]},
'multiply': {"redispatch": [1. / (max(float(el), 1.0)) for gen_id, el in enumerate(env.gen_max_ramp_up) if env.gen_redispatchable[gen_id]],
"set_storage": [1. / (max(float(el), 1.0)) for el in env.storage_max_p_prod]}
}
with open("preprocess_act.json", "w", encoding="utf-8") as f:
json.dump(obj=act_space_kwargs, fp=f)
Loading

0 comments on commit b5d9f41

Please sign in to comment.