Skip to content

Commit

Permalink
bugfix(#310): Model initialization
Browse files Browse the repository at this point in the history
- Fix several small bugs in initializing and using models in stages 4-5.
  • Loading branch information
jharwell committed Aug 26, 2022
1 parent 6fb94fa commit 6739498
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 40 deletions.
4 changes: 2 additions & 2 deletions sierra/core/graphs/heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,9 @@ def generate(self) -> None:
ax2.yaxis.set_ticks_position('left')

if self.legend is not None:
ax1.set_title("\n".join(textwrap.wrap(self.legend[0], 40)),
ax1.set_title("\n".join(textwrap.wrap(self.legend[0], 20)),
size=self.text_size['legend_label'])
ax2.set_title("\n".join(textwrap.wrap(self.legend[1], 40)),
ax2.set_title("\n".join(textwrap.wrap(self.legend[1], 20)),
size=self.text_size['legend_label'])

# Add colorbar.
Expand Down
2 changes: 1 addition & 1 deletion sierra/core/graphs/stacked_surface_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def generate(self) -> None:

if not dfs or len(dfs) > StackedSurfaceGraph.kMaxSurfaces:
self.logger.debug(("Not generating stacked surface graph: wrong # "
"files (must be > 0, <= %s"),
"files (must be > 0, <= %s)"),
StackedSurfaceGraph.kMaxSurfaces)
return

Expand Down
47 changes: 27 additions & 20 deletions sierra/core/graphs/summary_line_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,28 +345,35 @@ def _read_bw_stats(self) -> tp.Dict[str, tp.List[pd.DataFrame]]:
return dfs

def _read_models(self) -> tp.Tuple[pd.DataFrame, tp.List[str]]:
if self.model_root is not None:
self.logger.trace("Model root='%s',stem='%s'", # type: ignore
self.model_root,
self.input_stem)

exts = config.kModelsExt
model = self.model_root / (self.input_stem + exts['model'])
legend = self.model_root / (self.input_stem + exts['legend'])

if utils.path_exists(model):
model = storage.DataFrameReader('storage.csv')(model)
if utils.path_exists(legend):
with utils.utf8open(legend, 'r') as f:
model_legend = f.read().splitlines()
else:
self.logger.warning("No legend file for model '%s' found",
model)
model_legend = ['Model Prediction']
if self.model_root is None:
return (None, [])

self.logger.trace("Model root='%s'", # type: ignore
self.model_root)

exts = config.kModelsExt
modelf = self.model_root / (self.input_stem + exts['model'])
legendf = self.model_root / (self.input_stem + exts['legend'])

if not utils.path_exists(modelf):
self.logger.trace("No model='%s' found in model root", # type: ignore
modelf)
return (None, [])

model = storage.DataFrameReader('storage.csv')(modelf)
if utils.path_exists(legendf):
with utils.utf8open(legendf, 'r') as f:
legend = f.read().splitlines()
else:
self.logger.warning("No legend file for model '%s' found",
modelf)
legend = ['Model Prediction']

return (model, model_legend)
self.logger.trace("Loaded model='%s',legend='%s'", # type: ignore
modelf.relative_to(self.model_root),
legendf.relative_to(self.model_root))

return (None, [])
return (model, legend)


__api__ = [
Expand Down
2 changes: 1 addition & 1 deletion sierra/core/pipeline/stage3/run_collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def __call__(self, criteria: bc.IConcreteBatchCriteria) -> None:
@staticmethod
def _gather_worker(gatherq: mp.Queue,
processq: mp.Queue,
main_config: dict,
main_config: types.YAMLDict,
project: str,
storage_medium: str) -> None:
module = pm.module_load_tiered(project=project,
Expand Down
4 changes: 2 additions & 2 deletions sierra/core/pipeline/stage4/graph_collator.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def __call__(self,
criteria,
target: dict,
stat_collate_root: pathlib.Path) -> None:
self.logger.info("Stage4: Collating univariate files from batch in %s for graph '%s'...",
self.logger.info("Univariate files from batch in %s for graph '%s'...",
self.cmdopts['batch_output_root'],
target['src_stem'])
self.logger.trace(json.dumps(target, indent=4)) # type: ignore
Expand Down Expand Up @@ -165,7 +165,7 @@ def __call__(self,
criteria: bc.IConcreteBatchCriteria,
target: dict,
stat_collate_root: pathlib.Path) -> None:
self.logger.info("Stage4: Collating bivariate files from batch in %s for graph '%s'...",
self.logger.info("Bivariate files from batch in %s for graph '%s'...",
self.cmdopts['batch_output_root'],
target['src_stem'])
self.logger.trace(json.dumps(target, indent=4)) # type: ignore
Expand Down
2 changes: 1 addition & 1 deletion sierra/core/pipeline/stage4/pipeline_stage4.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def _load_models(self) -> None:
self.models_config = yaml.load(utils.utf8open(project_models),
yaml.FullLoader)
pm.models.initialize(self.cmdopts['project'],
self.cmdopts['project_model_root'])
pathlib.Path(self.cmdopts['project_model_root']))

# All models present in the .yaml file are enabled/set to run
# unconditionally
Expand Down
26 changes: 17 additions & 9 deletions sierra/core/pipeline/stage5/inter_scenario_comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,6 @@ def _gen_csvs(self,
self.controller)
return

model_ipath_stem = pathlib.Path(cmdopts['batch_model_root'], src_stem)
model_opath_stem = pathlib.Path(self.sc_model_root,
dest_stem + "-" + self.controller)

opath_stem = pathlib.Path(self.sc_csv_root,
dest_stem + "-" + self.controller)
writer = storage.DataFrameWriter('storage.csv')
Expand All @@ -284,11 +280,19 @@ def _gen_csvs(self,

# Collect performance results models and legends. Append to existing
# dataframes if they exist, otherwise start new ones.
model_opath = model_opath_stem.with_suffix(config.kModelsExt['model'])
model_ipath = model_ipath_stem.with_suffix(config.kModelsExt['model'])
legend_opath = model_opath_stem.with_suffix(config.kModelsExt['legend'])

# Can't use with_suffix() for opath, because that path contains the
# controller, which already has a '.' in it.
model_istem = pathlib.Path(cmdopts['batch_model_root'], src_stem)
model_ostem = pathlib.Path(self.sc_model_root,
dest_stem + "-" + self.controller)

model_ipath = model_istem.with_suffix(config.kModelsExt['model'])
model_opath = model_ostem.with_name(
model_ostem.name + config.kModelsExt['model'])
model_df = self._accum_df(model_ipath, model_opath, src_stem)
legend_opath = model_ostem.with_name(
model_ostem.name + config.kModelsExt['legend'])

if model_df is not None:
writer(model_df, model_opath, index=False)

Expand Down Expand Up @@ -323,7 +327,11 @@ def _accum_df(self,
len(t.index))
self.logger.warning("Truncating '%s.csv' to last row", src_stem)

cum_df = cum_df.append(t.loc[t.index[-1], t.columns.to_list()])
# Series are columns, so we have to transpose before concatenating
cum_df = pd.concat([cum_df,
t.loc[t.index[-1], :].to_frame().T])

# cum_df = cum_df.append(t.loc[t.index[-1], t.columns.to_list()])
return cum_df

return None
Expand Down
2 changes: 1 addition & 1 deletion sierra/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@

# Project packages

__version__ = "1.2.20"
__version__ = "1.2.21"
15 changes: 12 additions & 3 deletions todo.org
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@
- [ ] Switch from bash to something else (python/noxfile?); my scripts are
getting too complicated.

** TODO Make bivariate inter-experiment heatmaps renderable into videos

** TODO Add quick quick start: a linux VM with SIERRA all set up and ready to go
** TODO Add additional unit tests [0%]
- [ ] Parser unit tests (for CLI variables that can be used as batch criteria)
- [ ] Nodefile parsing

** TODO Better tasking framework/organization
- Switch to calling nox instead of calling bash directly from the yml workflow
files. This will allow me to easily parameterize multiple versions of python,
AND reduce the # of jobs I need/total workflow runtime on github by a good bit.

** TODO Switch from cmdopts dictionary to a data class with fields--I will get
WAY better help from the static analyzer

** TODO Add quick quick start: a linux VM with SIERRA all set up and ready to go

** TODO Added tutorial for creating a new model
- And add an example in the sample project

Expand Down Expand Up @@ -152,3 +159,5 @@ This really should be there.... And not require a valid --project to be passed.
** DONE Switch everything in examples.rst to use the sample project to increase
CLOSED: [2022-08-16 Tue 15:32]
cohesion
** DONE Make bivariate inter-experiment heatmaps renderable into videos
CLOSED: [2022-08-25 Thu 12:56]

0 comments on commit 6739498

Please sign in to comment.