Skip to content

Commit

Permalink
Merge pull request #17 from rte-france/bd-dev
Browse files Browse the repository at this point in the history
Update the baselines for backward compatibility with 0.9.1.post1
  • Loading branch information
BDonnot authored Jun 29, 2020
2 parents 4678182 + a0d2750 commit 3547e44
Show file tree
Hide file tree
Showing 15 changed files with 162 additions and 134 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,8 @@ l2rpn_baselines/DuelQLeapNet/working_superwell/
_doc_built/
l2rpn_baselines/test/*.txt
l2rpn_baselines/Multithreading_agent/
l2rpn_baselines/DeepQSimple/saved_baseline/
l2rpn_baselines/DuelQLeapNet/logs-eval/
l2rpn_baselines/DuelQSimple/saved_baseline/
l2rpn_baselines/SAC/saved_baseline/

8 changes: 4 additions & 4 deletions l2rpn_baselines/DeepQSimple/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@ def evaluate(env,
# Run
# Create agent
agent = DeepQSimple(action_space=env.action_space,
name=name,
store_action=nb_process == 1,
nn_archi=nn_archi)
name=name,
store_action=nb_process == 1,
nn_archi=nn_archi,
observation_space=env.observation_space)

# Load weights from file
agent.load(load_path)
agent.init_obs_extraction(env)

# Build runner
runner = Runner(**runner_params,
Expand Down
36 changes: 20 additions & 16 deletions l2rpn_baselines/DeepQSimple/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,12 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):

# Create grid2op game environement
env_init = None
from grid2op.Chronics import MultifolderWithCache
try:
from grid2op.Chronics import MultifolderWithCache
except:
from grid2op.Chronics import MultiFolder
MultifolderWithCache = MultiFolder

game_param = Parameters()
game_param.NB_TIMESTEP_COOLDOWN_SUB = 2
game_param.NB_TIMESTEP_COOLDOWN_LINE = 2
Expand All @@ -249,20 +254,19 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
chronics_class=MultifolderWithCache
)
# env.chronics_handler.set_max_iter(7*288)
env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((0003)|(0072)|(0057))$", x) is not None)
env.chronics_handler.real_data.reset_cache()
try:
env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((03)|(72)|(57))$", x) is not None)
env.chronics_handler.real_data.reset()
except RuntimeError as exc_:
raise exc_
except AttributeError as exc_:
# not available in all grid2op version
pass
# env.chronics_handler.real_data.
env_init = env
if args.nb_env > 1:
from grid2op.Environment import MultiEnvironment
env = MultiEnvironment(int(args.nb_env), env)
# TODO hack i'll fix in 0.9.0
env.action_space = env_init.action_space
env.observation_space = env_init.observation_space
env.fast_forward_chronics = lambda x: None
env.chronics_handler = env_init.chronics_handler
env.current_obs = env_init.current_obs
env.set_ff()
from l2rpn_baselines.utils import make_multi_env
env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))

tp = TrainingParam()

Expand All @@ -273,9 +277,9 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
tp.update_freq = 128

# limit the number of time steps played per scenarios
tp.step_increase_nb_iter = 2
tp.step_increase_nb_iter = 100 # None to deactivate it
tp.min_iter = 10
tp.update_nb_iter(2)
tp.update_nb_iter = 100 # once 100 scenarios are solved, increase of "step_increase_nb_iter"

# oversampling hard scenarios
tp.oversampling_rate = 3
Expand Down Expand Up @@ -322,10 +326,10 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
save_path=args.save_path,
load_path=args.load_path,
logs_dir=args.logs_dir,
nb_env=args.nb_env,
training_param=tp,
kwargs_converters=kwargs_converters,
kwargs_archi=kwargs_archi)
kwargs_archi=kwargs_archi,
verbose=True)
finally:
env.close()
if args.nb_env > 1:
Expand Down
27 changes: 7 additions & 20 deletions l2rpn_baselines/DuelQLeapNet/DuelQLeapNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,10 @@


class DuelQLeapNet(DeepQAgent):
def __init__(self,
action_space,
nn_archi,
name="DeepQAgent",
store_action=True,
istraining=False,
nb_env=1,
**kwargs_converters):
DeepQAgent.__init__(self,
action_space,
nn_archi,
name=name,
store_action=store_action,
istraining=istraining,
nb_env=nb_env,
**kwargs_converters)
self.tau_dim_start = None
self.tau_dim_end = None
self.add_tau = -1 # remove one to tau to have a vector of 0 and 1 instead of 1 and 2
self._tmp_obs = None
"""
Inheriting from :class:`l2rpn_baselines.DeepQAgent` this class implements the particular agent used for the
Double Duelling Deep Q network baseline, with the particularity that the Q network is encoded with a leap net.
It does nothing in particular.
"""
pass
6 changes: 3 additions & 3 deletions l2rpn_baselines/DuelQLeapNet/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def evaluate(env,
runner_params = env.get_params_for_runner()
runner_params["verbose"] = verbose

if load_path is None:
if load_path is None:
raise RuntimeError("Cannot evaluate a model if there is nothing to be loaded.")
path_model, path_target_model = DuelQLeapNet_NN.get_path_model(load_path, name)
nn_archi = LeapNet_NNParam.from_json(os.path.join(path_model, "nn_architecture.json"))
Expand All @@ -130,11 +130,11 @@ def evaluate(env,
agent = DuelQLeapNet(action_space=env.action_space,
name=name,
store_action=nb_process == 1,
nn_archi=nn_archi)
nn_archi=nn_archi,
observation_space=env.observation_space)

# Load weights from file
agent.load(load_path)
agent.init_obs_extraction(env)

# Build runner
runner = Runner(**runner_params,
Expand Down
28 changes: 15 additions & 13 deletions l2rpn_baselines/DuelQLeapNet/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,19 +317,21 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
# env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((0057))$", x) is not None)
env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((0000)|(0003))$", x) is not None)
env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((0000))$", x) is not None)
env.chronics_handler.real_data.reset_cache()
env.chronics_handler.real_data.reset()
# env.chronics_handler.real_data.
env_init = env
if args.nb_env > 1:
from grid2op.Environment import MultiEnvironment
env = MultiEnvironment(int(args.nb_env), env)
# TODO hack i'll fix in 1.0.0
env.action_space = env_init.action_space
env.observation_space = env_init.observation_space
env.fast_forward_chronics = lambda x: None
env.chronics_handler = env_init.chronics_handler
env.current_obs = env_init.current_obs
env.set_ff()
# from grid2op.Environment import MultiEnvironment
# env = MultiEnvironment(int(args.nb_env), env)
# # TODO hack i'll fix in 1.0.0
# env.action_space = env_init.action_space
# env.observation_space = env_init.observation_space
# env.fast_forward_chronics = lambda x: None
# env.chronics_handler = env_init.chronics_handler
# env.current_obs = env_init.current_obs
# env.set_ff()
from l2rpn_baselines.utils import make_multi_env
env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))

tp = TrainingParam()

Expand Down Expand Up @@ -372,10 +374,11 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):

# nn architecture
li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
"actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
"time_before_cooldown_sub", "timestep_overflow", "line_status", "rho", "line_status"]
"actual_dispatch", "target_dispatch"]
# li_attr_obs_Tau = ["rho", "line_status"]
li_attr_obs_Tau = []
li_attr_obs_Tau = ["topo_vect", "time_before_cooldown_line", "time_before_cooldown_sub",
"timestep_overflow", "line_status", "rho"]
sizes = [512, 512, 256, 256]

x_dim = LeapNet_NNParam.get_obs_size(env_init, li_attr_obs_X)
Expand All @@ -399,7 +402,6 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
save_path=args.save_path,
load_path=args.load_path,
logs_dir=args.logs_dir,
nb_env=args.nb_env,
training_param=tp,
kwargs_converters=kwargs_converters,
kwargs_archi=kwargs_archi,
Expand Down
5 changes: 2 additions & 3 deletions l2rpn_baselines/DuelQSimple/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,11 @@ def evaluate(env,
agent = DuelQSimple(action_space=env.action_space,
name=name,
store_action=nb_process == 1,
nn_archi=nn_archi)
nn_archi=nn_archi,
observation_space=env.observation_space)

# Load weights from file
agent.load(load_path)
agent.init_obs_extraction(env)

# Build runner
runner = Runner(**runner_params,
agentClass=None,
Expand Down
38 changes: 21 additions & 17 deletions l2rpn_baselines/DuelQSimple/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,12 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):

# Create grid2op game environement
env_init = None
from grid2op.Chronics import MultifolderWithCache
try:
from grid2op.Chronics import MultifolderWithCache
except:
from grid2op.Chronics import MultiFolder
MultifolderWithCache = MultiFolder

game_param = Parameters()
game_param.NB_TIMESTEP_COOLDOWN_SUB = 2
game_param.NB_TIMESTEP_COOLDOWN_LINE = 2
Expand All @@ -248,20 +253,19 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
chronics_class=MultifolderWithCache
)
# env.chronics_handler.set_max_iter(7*288)
env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((0003)|(0072)|(0057))$", x) is not None)
env.chronics_handler.real_data.reset_cache()
try:
env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((03)|(72)|(57))$", x) is not None)
env.chronics_handler.real_data.reset()
except RuntimeError as exc_:
raise exc_
except AttributeError as exc_:
# not available in all grid2op version
pass
# env.chronics_handler.real_data.
env_init = env
if args.nb_env > 1:
from grid2op.Environment import MultiEnvironment
env = MultiEnvironment(int(args.nb_env), env)
# TODO hack i'll fix in 0.9.0
env.action_space = env_init.action_space
env.observation_space = env_init.observation_space
env.fast_forward_chronics = lambda x: None
env.chronics_handler = env_init.chronics_handler
env.current_obs = env_init.current_obs
env.set_ff()
from l2rpn_baselines.utils import make_multi_env
env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))

tp = TrainingParam()

Expand All @@ -272,9 +276,9 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
tp.update_freq = 128

# limit the number of time steps played per scenarios
tp.step_increase_nb_iter = 2
tp.step_increase_nb_iter = 100 # None to deactivate it
tp.min_iter = 10
tp.update_nb_iter(2)
tp.update_nb_iter = 100 # once 100 scenarios are solved, increase of "step_increase_nb_iter"

# oversampling hard scenarios
tp.oversampling_rate = 3
Expand Down Expand Up @@ -310,7 +314,7 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
# which actions i keep
kwargs_converters = {"all_actions": None,
"set_line_status": False,
"change_bus_vect": True,
"change_bus_vect": False,
"set_topo_vect": False
}
nm_ = args.name if args.name is not None else DEFAULT_NAME
Expand All @@ -321,10 +325,10 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
save_path=args.save_path,
load_path=args.load_path,
logs_dir=args.logs_dir,
nb_env=args.nb_env,
training_param=tp,
kwargs_converters=kwargs_converters,
kwargs_archi=kwargs_archi)
kwargs_archi=kwargs_archi,
verbose=True)
finally:
env.close()
if args.nb_env > 1:
Expand Down
4 changes: 2 additions & 2 deletions l2rpn_baselines/SAC/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ def evaluate(env,
agent = SAC(action_space=env.action_space,
name=name,
store_action=nb_process == 1,
nn_archi=nn_archi)
nn_archi=nn_archi,
observation_space=env.observation_space)

# Load weights from file
agent.load(load_path)
agent.init_obs_extraction(env)

# Print model summary
stringlist = []
Expand Down
33 changes: 18 additions & 15 deletions l2rpn_baselines/SAC/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,12 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):

# Create grid2op game environement
env_init = None
from grid2op.Chronics import MultifolderWithCache
try:
from grid2op.Chronics import MultifolderWithCache
except:
from grid2op.Chronics import MultiFolder
MultifolderWithCache = MultiFolder

game_param = Parameters()
game_param.NB_TIMESTEP_COOLDOWN_SUB = 2
game_param.NB_TIMESTEP_COOLDOWN_LINE = 2
Expand All @@ -254,20 +259,19 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
chronics_class=MultifolderWithCache
)
# env.chronics_handler.set_max_iter(7*288)
env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((0003)|(0072)|(0057))$", x) is not None)
env.chronics_handler.real_data.reset_cache()
try:
env.chronics_handler.real_data.set_filter(lambda x: re.match(".*((03)|(72)|(57))$", x) is not None)
env.chronics_handler.real_data.reset()
except RuntimeError as exc_:
raise exc_
except AttributeError as exc_:
# not available in all grid2op version
pass
# env.chronics_handler.real_data.
env_init = env
if args.nb_env > 1:
from grid2op.Environment import MultiEnvironment
env = MultiEnvironment(int(args.nb_env), env)
# TODO hack i'll fix in 0.9.0
env.action_space = env_init.action_space
env.observation_space = env_init.observation_space
env.fast_forward_chronics = lambda x: None
env.chronics_handler = env_init.chronics_handler
env.current_obs = env_init.current_obs
env.set_ff()
from l2rpn_baselines.utils import make_multi_env
env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env))

tp = TrainingParam()

Expand All @@ -278,9 +282,9 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
tp.update_freq = 128

# limit the number of time steps played per scenarios
tp.step_increase_nb_iter = 2
tp.step_increase_nb_iter = 100 # None to deactivate it
tp.min_iter = 10
tp.update_nb_iter(2)
tp.update_nb_iter = 100 # once 100 scenarios are solved, increase of "step_increase_nb_iter"

# oversampling hard scenarios
tp.oversampling_rate = 3
Expand Down Expand Up @@ -334,7 +338,6 @@ def __call__(self, action, env, has_error, is_done, is_illegal, is_ambiguous):
save_path=args.save_path,
load_path=args.load_path,
logs_dir=args.logs_dir,
nb_env=args.nb_env,
training_param=tp,
kwargs_converters=kwargs_converters,
kwargs_archi=kwargs_archi)
Expand Down
1 change: 1 addition & 0 deletions l2rpn_baselines/Template/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def evaluate(env,
if save_gif:
save_log_gif(logs_path, res)


if __name__ == "__main__":
"""
This is a possible implementation of the eval script.
Expand Down
Loading

0 comments on commit 3547e44

Please sign in to comment.