bugfix(#310): Model initialization

- Fix several small bugs in initializing and using models in stages 4-5.
jharwell · Aug 26, 2022 · 6739498 · 6739498
1 parent 6fb94fa
commit 6739498
Show file tree

Hide file tree

Showing 9 changed files with 64 additions and 40 deletions.
diff --git a/sierra/core/graphs/heatmap.py b/sierra/core/graphs/heatmap.py
@@ -252,9 +252,9 @@ def generate(self) -> None:
         ax2.yaxis.set_ticks_position('left')
 
         if self.legend is not None:
-            ax1.set_title("\n".join(textwrap.wrap(self.legend[0], 40)),
+            ax1.set_title("\n".join(textwrap.wrap(self.legend[0], 20)),
                           size=self.text_size['legend_label'])
-            ax2.set_title("\n".join(textwrap.wrap(self.legend[1], 40)),
+            ax2.set_title("\n".join(textwrap.wrap(self.legend[1], 20)),
                           size=self.text_size['legend_label'])
 
         # Add colorbar.

diff --git a/sierra/core/graphs/stacked_surface_graph.py b/sierra/core/graphs/stacked_surface_graph.py
@@ -82,7 +82,7 @@ def generate(self) -> None:
 
         if not dfs or len(dfs) > StackedSurfaceGraph.kMaxSurfaces:
             self.logger.debug(("Not generating stacked surface graph: wrong # "
-                               "files (must be > 0, <= %s"),
+                               "files (must be > 0, <= %s)"),
                               StackedSurfaceGraph.kMaxSurfaces)
             return
 

diff --git a/sierra/core/graphs/summary_line_graph.py b/sierra/core/graphs/summary_line_graph.py
@@ -345,28 +345,35 @@ def _read_bw_stats(self) -> tp.Dict[str, tp.List[pd.DataFrame]]:
         return dfs
 
     def _read_models(self) -> tp.Tuple[pd.DataFrame, tp.List[str]]:
-        if self.model_root is not None:
-            self.logger.trace("Model root='%s',stem='%s'",   # type: ignore
-                              self.model_root,
-                              self.input_stem)
-
-            exts = config.kModelsExt
-            model = self.model_root / (self.input_stem + exts['model'])
-            legend = self.model_root / (self.input_stem + exts['legend'])
-
-            if utils.path_exists(model):
-                model = storage.DataFrameReader('storage.csv')(model)
-                if utils.path_exists(legend):
-                    with utils.utf8open(legend, 'r') as f:
-                        model_legend = f.read().splitlines()
-                else:
-                    self.logger.warning("No legend file for model '%s' found",
-                                        model)
-                    model_legend = ['Model Prediction']
+        if self.model_root is None:
+            return (None, [])
+
+        self.logger.trace("Model root='%s'",   # type: ignore
+                          self.model_root)
+
+        exts = config.kModelsExt
+        modelf = self.model_root / (self.input_stem + exts['model'])
+        legendf = self.model_root / (self.input_stem + exts['legend'])
+
+        if not utils.path_exists(modelf):
+            self.logger.trace("No model='%s' found in model root",  # type: ignore
+                              modelf)
+            return (None, [])
+
+        model = storage.DataFrameReader('storage.csv')(modelf)
+        if utils.path_exists(legendf):
+            with utils.utf8open(legendf, 'r') as f:
+                legend = f.read().splitlines()
+        else:
+            self.logger.warning("No legend file for model '%s' found",
+                                modelf)
+            legend = ['Model Prediction']
 
-                return (model, model_legend)
+        self.logger.trace("Loaded model='%s',legend='%s'",  # type: ignore
+                          modelf.relative_to(self.model_root),
+                          legendf.relative_to(self.model_root))
 
-        return (None, [])
+        return (model, legend)
 
 
 __api__ = [

diff --git a/sierra/core/pipeline/stage3/run_collator.py b/sierra/core/pipeline/stage3/run_collator.py
@@ -122,7 +122,7 @@ def __call__(self, criteria: bc.IConcreteBatchCriteria) -> None:
     @staticmethod
     def _gather_worker(gatherq: mp.Queue,
                        processq: mp.Queue,
-                       main_config: dict,
+                       main_config: types.YAMLDict,
                        project: str,
                        storage_medium: str) -> None:
         module = pm.module_load_tiered(project=project,

diff --git a/sierra/core/pipeline/stage4/graph_collator.py b/sierra/core/pipeline/stage4/graph_collator.py
@@ -81,7 +81,7 @@ def __call__(self,
                  criteria,
                  target: dict,
                  stat_collate_root: pathlib.Path) -> None:
-        self.logger.info("Stage4: Collating univariate files from batch in %s for graph '%s'...",
+        self.logger.info("Univariate files from batch in %s for graph '%s'...",
                          self.cmdopts['batch_output_root'],
                          target['src_stem'])
         self.logger.trace(json.dumps(target, indent=4))   # type: ignore
@@ -165,7 +165,7 @@ def __call__(self,
                  criteria: bc.IConcreteBatchCriteria,
                  target: dict,
                  stat_collate_root: pathlib.Path) -> None:
-        self.logger.info("Stage4: Collating bivariate files from batch in %s for graph '%s'...",
+        self.logger.info("Bivariate files from batch in %s for graph '%s'...",
                          self.cmdopts['batch_output_root'],
                          target['src_stem'])
         self.logger.trace(json.dumps(target, indent=4))   # type: ignore

diff --git a/sierra/core/pipeline/stage4/pipeline_stage4.py b/sierra/core/pipeline/stage4/pipeline_stage4.py
@@ -205,7 +205,7 @@ def _load_models(self) -> None:
         self.models_config = yaml.load(utils.utf8open(project_models),
                                        yaml.FullLoader)
         pm.models.initialize(self.cmdopts['project'],
-                             self.cmdopts['project_model_root'])
+                             pathlib.Path(self.cmdopts['project_model_root']))
 
         # All models present in the .yaml file are enabled/set to run
         # unconditionally

diff --git a/sierra/core/pipeline/stage5/inter_scenario_comparator.py b/sierra/core/pipeline/stage5/inter_scenario_comparator.py
@@ -259,10 +259,6 @@ def _gen_csvs(self,
                                 self.controller)
             return
 
-        model_ipath_stem = pathlib.Path(cmdopts['batch_model_root'], src_stem)
-        model_opath_stem = pathlib.Path(self.sc_model_root,
-                                        dest_stem + "-" + self.controller)
-
         opath_stem = pathlib.Path(self.sc_csv_root,
                                   dest_stem + "-" + self.controller)
         writer = storage.DataFrameWriter('storage.csv')
@@ -284,11 +280,19 @@ def _gen_csvs(self,
 
         # Collect performance results models and legends. Append to existing
         # dataframes if they exist, otherwise start new ones.
-        model_opath = model_opath_stem.with_suffix(config.kModelsExt['model'])
-        model_ipath = model_ipath_stem.with_suffix(config.kModelsExt['model'])
-        legend_opath = model_opath_stem.with_suffix(config.kModelsExt['legend'])
-
+        # Can't use with_suffix() for opath, because that path contains the
+        # controller, which already has a '.' in it.
+        model_istem = pathlib.Path(cmdopts['batch_model_root'], src_stem)
+        model_ostem = pathlib.Path(self.sc_model_root,
+                                   dest_stem + "-" + self.controller)
+
+        model_ipath = model_istem.with_suffix(config.kModelsExt['model'])
+        model_opath = model_ostem.with_name(
+            model_ostem.name + config.kModelsExt['model'])
         model_df = self._accum_df(model_ipath, model_opath, src_stem)
+        legend_opath = model_ostem.with_name(
+            model_ostem.name + config.kModelsExt['legend'])
+
         if model_df is not None:
             writer(model_df, model_opath, index=False)
 
@@ -323,7 +327,11 @@ def _accum_df(self,
                                     len(t.index))
                 self.logger.warning("Truncating '%s.csv' to last row", src_stem)
 
-            cum_df = cum_df.append(t.loc[t.index[-1], t.columns.to_list()])
+            # Series are columns, so we have to transpose before concatenating
+            cum_df = pd.concat([cum_df,
+                                t.loc[t.index[-1], :].to_frame().T])
+
+            # cum_df = cum_df.append(t.loc[t.index[-1], t.columns.to_list()])
             return cum_df
 
         return None

diff --git a/sierra/version.py b/sierra/version.py
@@ -20,4 +20,4 @@
 
 # Project packages
 
-__version__ = "1.2.20"
+__version__ = "1.2.21"
diff --git a/todo.org b/todo.org
@@ -18,13 +18,20 @@
 - [ ] Switch from bash to something else (python/noxfile?); my scripts are
   getting too complicated.
 
-** TODO Make bivariate inter-experiment heatmaps renderable into videos
-
-** TODO Add quick quick start: a linux VM with SIERRA all set up and ready to go
 ** TODO Add additional unit tests [0%]
 - [ ] Parser unit tests (for CLI variables that can be used as batch criteria)
 - [ ] Nodefile parsing
 
+** TODO Better tasking framework/organization
+- Switch to calling nox instead of calling bash directly from the yml workflow
+files. This will allow me to easily parameterize multiple versions of python,
+AND reduce the # of jobs I need/total workflow runtime on github by a good bit.
+
+** TODO Switch from cmdopts dictionary to a data class with fields--I will get
+WAY better help from the static analyzer
+
+** TODO Add quick quick start: a linux VM with SIERRA all set up and ready to go
+
 ** TODO Added tutorial for creating a new model
 - And add an example in the sample project
 
@@ -152,3 +159,5 @@ This really should be there.... And not require a valid --project to be passed.
 ** DONE Switch everything in examples.rst to use the sample project to increase
    CLOSED: [2022-08-16 Tue 15:32]
   cohesion
+** DONE Make bivariate inter-experiment heatmaps renderable into videos
+   CLOSED: [2022-08-25 Thu 12:56]