From 84ace89bbf4d63258fa73c31199c637c5436ea36 Mon Sep 17 00:00:00 2001 From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:36:15 +0000 Subject: [PATCH] Add docstrings; fix test with fickle assertion --- src/calliope/backend/pyomo_backend_model.py | 2 +- src/calliope/core/model.py | 4 +- src/calliope/preprocess/model_data.py | 205 +++++++++++++++++--- tests/test_backend_pyomo.py | 17 +- 4 files changed, 190 insertions(+), 38 deletions(-) diff --git a/src/calliope/backend/pyomo_backend_model.py b/src/calliope/backend/pyomo_backend_model.py index 19fd55c5..a1465bd8 100644 --- a/src/calliope/backend/pyomo_backend_model.py +++ b/src/calliope/backend/pyomo_backend_model.py @@ -392,7 +392,7 @@ def update_parameter( "parameters", name, "Defining values for a previously fully/partially undefined parameter. " - f"The optimisation problem components {refs_to_update} will be re-built.", + f"The optimisation problem components {sorted(refs_to_update)} will be re-built.", "info", ) self.delete_component(name, "parameters") diff --git a/src/calliope/core/model.py b/src/calliope/core/model.py index 48391d39..163be0b0 100644 --- a/src/calliope/core/model.py +++ b/src/calliope/core/model.py @@ -213,9 +213,7 @@ def _init_from_model_def_dict( attributes, param_metadata, ) - model_data_factory.build(timeseries_dataframes) - - self._model_data = model_data_factory.model_data + self._model_data = model_data_factory.build(timeseries_dataframes) log_time( LOGGER, diff --git a/src/calliope/preprocess/model_data.py b/src/calliope/preprocess/model_data.py index fed70327..4b4bdcad 100644 --- a/src/calliope/preprocess/model_data.py +++ b/src/calliope/preprocess/model_data.py @@ -37,6 +37,7 @@ class ModelDefinition(TypedDict): class ModelDataFactory: + # TODO: move into yaml syntax and have it be updatable LOOKUP_PARAMS = { "carrier_in": "carriers", "carrier_out": "carriers", @@ -64,28 +65,18 @@ def __init__( attributes: dict, param_attributes: dict[str, dict], ): - """ - Take a Calliope model_run and convert it into an xarray Dataset, ready for - constraint generation. Timeseries data is also extracted from file at this - point, and the time dimension added to the data - - Parameters - ---------- - model_run_dict : AttrDict - preprocessed model_run dictionary, as produced by - Calliope.preprocess.preprocess_model - - Returns - ------- - data : xarray Dataset - Dataset with optimisation param_dict as variables, optimisation sets as - coordinates, and other information in attributes. - data_pre_time : xarray Dataset, only returned if debug = True - Dataset, prior to time dimension addition, with optimisation param_dict - as variables, optimisation sets as coordinates, and other information - in attributes. + """Take a Calliope model definition dictionary and convert it into an xarray Dataset, ready for + constraint generation. + + This includes extracting timeseries data from file and resampling/clustering as necessary. + Args: + model_config (dict): Model initialisation configuration (i.e., `config.init`). + model_definition (ModelDefinition): Definition of model nodes and technologies as a dictionary. + attributes (dict): Attributes to attach to the model Dataset. + param_attributes (dict[str, dict]): Attributes to attach to the generated model DataArrays. """ + self.config: dict = model_config self.model_definition: ModelDefinition = model_definition.copy() self.model_data = xr.Dataset(attrs=AttrDict(attributes)) @@ -97,7 +88,15 @@ def __init__( flipped_attributes[subkey][key] = subval self.param_attrs = flipped_attributes - def build(self, timeseries_dfs: Optional[dict[str, pd.DataFrame]]): + def build(self, timeseries_dfs: Optional[dict[str, pd.DataFrame]]) -> xr.Dataset: + """Main function used by the calliope Model object to invoke the factory and get it churning out a model dataset. + + Args: + timeseries_dfs (Optional[dict[str, pd.DataFrame]]): If loading data from pre-loaded dataframes, they need to be provided here. + + Returns: + xr.Dataset: Built model dataset, including the timeseries dimension. + """ self.add_node_tech_data() self.add_time_dimension(timeseries_dfs) self.add_top_level_params() @@ -106,8 +105,16 @@ def build(self, timeseries_dfs: Optional[dict[str, pd.DataFrame]]): self.add_link_distances() self.resample_time_dimension() self.assign_input_attr() + return self.model_data def add_node_tech_data(self): + """For each node, extract technology definitions and node-level parameters and convert them to arrays. + + The node definition will first be updated according to any defined inheritance (via `inherit`), + before processing each defined tech (which will also be updated according to its inheritance tree). + + Node and tech definitions will be validated against the model definition schema here. + """ active_node_dict = self._inherit_defs("nodes") links_at_nodes = self._links_to_node_format(active_node_dict) @@ -157,6 +164,12 @@ def add_node_tech_data(self): self.model_data = xr.merge([self.model_data, node_tech_ds, node_ds]) def add_top_level_params(self): + """Process any parameters defined in the top-level `parameters` key. + + Raises: + KeyError: Cannot provide the same name for a top-level parameter as those defined already at the tech/node level. + + """ if "parameters" not in self.model_definition: return None for param_name, param_data in self.model_definition["parameters"].items(): @@ -176,6 +189,15 @@ def add_top_level_params(self): self.model_data = self.model_data.merge(param_ds) def add_time_dimension(self, timeseries_dfs: Optional[dict[str, pd.DataFrame]]): + """Process file/dataframe references in the model data and use it to expand the model to include a time dimension. + + Args: + timeseries_dfs (Optional[dict[str, pd.DataFrame]]): + Reference to pre-loaded pandas.DataFrame objects, if any reference to them in the model definition (`via df=`). + + Raises: + exceptions.ModelError: The model has to have a time dimension, so at least one reference must exist. + """ self.model_data = time.add_time_dimension( self.model_data, self.config, timeseries_dfs ) @@ -186,6 +208,7 @@ def add_time_dimension(self, timeseries_dfs: Optional[dict[str, pd.DataFrame]]): self.model_data = time.add_inferred_time_params(self.model_data) def resample_time_dimension(self): + """If resampling/clustering is requested in the initialisation config, apply it here.""" if self.config["time_resample"] is not None: self.model_data = time.resample( self.model_data, self.config["time_resample"] @@ -196,6 +219,7 @@ def resample_time_dimension(self): ) def clean_data_from_undefined_members(self): + """Generate the `definition_matrix` array and use it to strip out any dimension items that are NaN in all arrays and any arrays that are NaN in all index positions.""" def_matrix = self.model_data.active.notnull() & ( self.model_data.carrier_in.notnull() | self.model_data.carrier_out.notnull() ) @@ -232,6 +256,11 @@ def clean_data_from_undefined_members(self): self.model_data = self.model_data.drop_vars(vars_to_delete) def add_link_distances(self): + """If latitude/longitude are provided but distances between nodes have not been computed, compute them now. + + The schema will have already handled the fact that if one of lat/lon is provided, the other must also be provided. + + """ # If no distance was given, we calculate it from coordinates if ( "latitude" in self.model_data.data_vars @@ -273,6 +302,11 @@ def add_link_distances(self): ) def add_colors(self): + """If technology colours have not been provided / only partially provided, generate a sequence of colors to fill the gap. + + This is a convenience function for downstream plotting. + Since we have removed core plotting components from Calliope, it is not a strictly necessary preprocessing step. + """ techs = self.model_data.techs color_array = self.model_data.get("color") default_palette_cycler = itertools.cycle(range(len(self._DEFAULT_PALETTE))) @@ -290,10 +324,25 @@ def add_colors(self): self.model_data["color"] = self.model_data["color"].fillna(new_color_array) def assign_input_attr(self): + """All input parameters need to be assigned the `is_result=False` attribute to be able to filter the arrays in the calliope.Model object.""" for var_name, var_data in self.model_data.data_vars.items(): self.model_data[var_name] = var_data.assign_attrs(is_result=False) def _get_relevant_node_refs(self, techs_dict: AttrDict, node: str) -> list[str]: + """Get all references to parameters made in technologies at nodes. + + This defines those arrays in the dataset that *must* be indexed over `nodes` as well as `techs`. + + If timeseries files/dataframes are referenced in a tech at a node, the node name is added as the column name in-place. + Techs *must* define these timeseries references explicitly at nodes to access different data columns at different nodes. + + Args: + techs_dict (AttrDict): Dictionary of technologies defined at a node. + node (str): Name of the node. + + Returns: + list[str]: List of parameters at this node that must be indexed over the node dimension. + """ refs = set() for key, val in techs_dict.as_dict_flat().items(): if ( @@ -311,6 +360,15 @@ def _get_relevant_node_refs(self, techs_dict: AttrDict, node: str) -> list[str]: return list(refs) def _param_dict_to_array(self, param_name: str, param_data: Param) -> xr.DataArray: + """Take a blessed parameter dictionary and convert it to an xarray DataArray. + + Args: + param_name (str): Name of the parameter being converted. + param_data (Param): Blessed dictionary. I.e., keys/values follow an expected structure. + + Returns: + xr.DataArray: Array representation of the parameter. + """ if param_data["dims"]: param_series = pd.Series( data=param_data["data"], @@ -330,8 +388,22 @@ def _param_dict_to_array(self, param_name: str, param_data: Param) -> xr.DataArr def _definition_dict_to_ds( self, def_dict: dict[str, dict[str, dict | list[str] | DATA_T]], - dim_name: str, + dim_name: Literal["nodes", "techs"], ) -> xr.Dataset: + """Convert a dictionary of nodes/techs with their parameter definitions into an xarray dataset. + + Node/tech name will be injected into each parameter's `index` and `dims` lists so that the resulting arrays include those dimensions. + + Args: + def_dict (dict[str, dict[str, dict | list[str] | DATA_T]]): + `node`/`tech` definitions. + The first set of keys are dimension index items, the second set of keys are parameter names. + Parameters need not be blessed. + dim_name (Literal[nodes, techs]): Dimension name of the dictionary items. + + Returns: + xr.Dataset: Dataset with arrays indexed over (at least) the input `dim_name`. + """ param_ds = xr.Dataset() for idx_name, idx_params in def_dict.items(): param_das: list[xr.DataArray] = [] @@ -347,6 +419,24 @@ def _definition_dict_to_ds( def _prepare_param_dict( self, param_name: str, param_data: dict | list[str] | DATA_T ) -> Param: + """Convert a range of parameter definitions into the blessed `Param` format, i.e.: + + ``` + data: numeric/boolean/string data or list of them. + index: list of lists containing dimension index items (number of items in the sub-lists == length of `dims`). + dims: list of dimension names. + ``` + + Args: + param_name (str): Parameter name (used only in error messages). + param_data (dict | list[str] | DATA_T): Input unformatted parameter data. + + Raises: + ValueError: If the parameter is unindexed (i.e., no `dims`/`index`) and is not a lookup array (see LOOKUP_PARAMS), it cannot define a list of data. + + Returns: + Param: Blessed parameter dictionary. + """ if isinstance(param_data, dict): data = param_data["data"] index_items = [listify(idx) for idx in listify(param_data["index"])] @@ -376,6 +466,29 @@ def _inherit_defs( dim_dict: Optional[AttrDict] = None, err_message_prefix: str = "", ) -> AttrDict: + """For a set of node/tech definitions, climb the inheritance tree to build a final definition dictionary. + + For `techs` at `nodes`, the first step is to inherit the technology definition from `techs`, _then_ to climb `inherit` references. + + Base definitions will take precedence over inherited ones and more recent inherited definitions will take precedence over older ones. + + If a `tech`/`node` has the `active` parameter set to `False` (including if it inherits this parameter), it will not make it into the output dictionary. + + Args: + dim_name (Literal[nodes, techs]): Name of dimension we're working with. + dim_dict (Optional[AttrDict], optional): + Base dictionary to work from. + If not defined, `dim_name` will be used to access the dictionary from the base model definition. + Defaults to None. + err_message_prefix (str, optional): + If working with techs at nodes, it is prudent to provide the node name to prefix error messages. Defaults to "". + + Raises: + KeyError: Cannot define a `tech` at a `node` if it isn't already defined under the `techs` top-level key. + + Returns: + AttrDict: Dictionary containing all active tech/node definitions with inherited parameters. + """ updated_defs = AttrDict() if dim_dict is None: dim_dict = self.model_definition[dim_name] @@ -414,10 +527,34 @@ def _inherit_defs( def _climb_inheritance_tree( self, dim_item_dict: AttrDict, - dim_name: str, + dim_name: Literal["nodes", "techs"], item_name: str, inheritance: Optional[list] = None, ) -> tuple[AttrDict, Optional[list]]: + """Follow the `inherit` references from `nodes` to `node_groups` / from `techs` to `tech_groups`. + + Abstract group definitions (those in `node_groups`/`tech_groups`) can inherit each other, but `nodes`/`techs` cannot. + + This function will be called recursively until a definition dictionary without `inherit` is reached. + + Args: + dim_item_dict (AttrDict): + Dictionary (possibly) containing `inherit`. If it doesn't contain `inherit`, the climbing stops here. + dim_name (Literal[nodes, techs]): + The name of the dimension we're working with, so that we can access the correct `_groups` definitions. + item_name (str): + The current position in the inheritance tree. + inheritance (Optional[list], optional): + A list of items that have been inherited (starting with the oldest). + If the first `dim_item_dict` does not contain `inherit`, this will remain as None. + Defaults to None. + + Raises: + KeyError: Must inherit from a named group item in `node_groups` (for `nodes`) and `tech_groups` (for `techs`) + + Returns: + tuple[AttrDict, Optional[list]]: Definition dictionary with inherited data and a list of the inheritance tree climbed to get there. + """ dim_name_singular = dim_name.removesuffix("s") dim_group_def = self.model_definition.get(f"{dim_name_singular}_groups", None) to_inherit = dim_item_dict.get("inherit", None) @@ -440,6 +577,16 @@ def _climb_inheritance_tree( return updated_dim_item_dict, inheritance def _links_to_node_format(self, active_node_dict: AttrDict) -> AttrDict: + """Process `transmission` techs into links by assigned them to the nodes defined by their `from` and `to` keys. + + Args: + active_node_dict (AttrDict): + Dictionary of nodes that are active in this model. + If a transmission tech references a non-active / undefined node, a link will not be generated. + + Returns: + AttrDict: Dictionary of transmission techs distributed to nodes (of the form {node_name: {tech_name: {...}, tech_name: {}}}). + """ active_link_techs = AttrDict( { tech: tech_def @@ -473,7 +620,7 @@ def _links_to_node_format(self, active_node_dict: AttrDict) -> AttrDict: return link_tech_dict - def _update_param_coords(self, param_name: str, param_da: xr.DataArray) -> None: + def _update_param_coords(self, param_name: str, param_da: xr.DataArray): """ Check array coordinates to see if any should be in datetime format, if the base model coordinate is in datetime format. @@ -499,7 +646,7 @@ def _update_param_coords(self, param_name: str, param_da: xr.DataArray) -> None: f"(parameters, {param_name}) | Updating {coord_name} dimension index values to datetime format" ) - def _log_param_updates(self, param_name: str, param_da: xr.DataArray) -> None: + def _log_param_updates(self, param_name: str, param_da: xr.DataArray): """ Check array coordinates to see if: 1. any are new compared to the base model dimensions. @@ -526,6 +673,14 @@ def _log_param_updates(self, param_name: str, param_da: xr.DataArray) -> None: @staticmethod def _add_active_node_tech(ds: xr.Dataset) -> xr.Dataset: + """For each node, create a boolean array where each `tech` that is defined at that `node` is set to True. + + Args: + ds (xr.Dataset): Dataset of technology parameters at a given node. + + Returns: + xr.Dataset: Input with the addition of a boolean `active` array. + """ if not ds.nodes.shape: ds["nodes"] = ds["nodes"].expand_dims("nodes") if not ("techs" in ds.coords and "nodes" in ds.coords): diff --git a/tests/test_backend_pyomo.py b/tests/test_backend_pyomo.py index 140702b8..878ccb4f 100755 --- a/tests/test_backend_pyomo.py +++ b/tests/test_backend_pyomo.py @@ -2342,16 +2342,17 @@ def test_update_parameter_replace_defaults(self, simple_supply): assert expected.equals(updated_param) def test_update_parameter_add_dim(self, caplog, simple_supply): - """flow_out_eff doesn't have the time dimension in the simple model, we add it here.""" + """ + flow_out_eff doesn't have the time dimension in the simple model, we add it here. + """ updated_param = simple_supply.inputs.flow_out_eff.where( simple_supply.inputs.timesteps.notnull() ) - - refs_to_update = { - "balance_transmission", + refs_to_update = [ # should be sorted alphabetically "balance_supply_no_storage", + "balance_transmission", "flow_out_inc_eff", - } + ] caplog.set_level(logging.DEBUG) simple_supply.backend.update_parameter("flow_out_eff", updated_param) @@ -2368,12 +2369,10 @@ def test_update_parameter_add_dim(self, caplog, simple_supply): assert "timesteps" in expected.dims def test_update_parameter_replace_undefined(self, caplog, simple_supply): - """source_eff isn't defined in the inputs, so is a dimensionless value in the pyomo object, assigned its default value. - NOTE: For the test, there should be at most 2 items in `refs to update`, otherwise their order in the logging message is unknown and can lead the test to fail erroneously. - """ + """source_eff isn't defined in the inputs, so is a dimensionless value in the pyomo object, assigned its default value.""" updated_param = simple_supply.inputs.flow_out_eff - refs_to_update = {"balance_supply_no_storage"} + refs_to_update = ["balance_supply_no_storage"] caplog.set_level(logging.DEBUG) simple_supply.backend.update_parameter("source_eff", updated_param)