From fd58fe5ccea8c2c7163c8e7fe8574c635a40414c Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Fri, 6 Sep 2024 13:43:44 -0400 Subject: [PATCH] change `time_variable` arg to `sorting_variable`, allow None, and move TEMPO default into Harmony adapter --- concatenator/harmony/service_adapter.py | 1 + concatenator/run_stitchee.py | 5 +++++ concatenator/stitchee.py | 19 ++++++++++++++----- .../integration/test_history_construction.py | 2 +- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/concatenator/harmony/service_adapter.py b/concatenator/harmony/service_adapter.py index d4034ba..532cfe5 100644 --- a/concatenator/harmony/service_adapter.py +++ b/concatenator/harmony/service_adapter.py @@ -123,6 +123,7 @@ def process_catalog(self, catalog: pystac.Catalog) -> pystac.Catalog: write_tmp_flat_concatenated=False, keep_tmp_files=False, concat_dim="mirror_step", # This is currently set only for TEMPO + sorting_variable="geolocation/time", # This is currently set only for TEMPO history_to_append=new_history_json, logger=self.logger, ) diff --git a/concatenator/run_stitchee.py b/concatenator/run_stitchee.py index f2cdf42..d6e5b19 100644 --- a/concatenator/run_stitchee.py +++ b/concatenator/run_stitchee.py @@ -69,6 +69,11 @@ def parse_args(args: list) -> argparse.Namespace: help="Dimension to concatenate along, if possible. " "This is required if using the 'xarray-concat' method", ) + parser.add_argument( + "--sorting_variable", + help="Name of a variable to use for sorting datasets before concatenation by xarray. " + "E.g., 'time'.", + ) parser.add_argument( "--xarray_arg_compat", help="'compat' argument passed to xarray.concat() or xarray.combine_by_coords().", diff --git a/concatenator/stitchee.py b/concatenator/stitchee.py index d20c822..4cd2263 100644 --- a/concatenator/stitchee.py +++ b/concatenator/stitchee.py @@ -40,7 +40,7 @@ def stitchee( concat_method: str | None = "xarray-concat", concat_dim: str = "", concat_kwargs: dict | None = None, - time_variable: str = "geolocation/time", + sorting_variable: str | None = None, history_to_append: str | None = None, copy_input_files: bool = False, overwrite_output_file: bool = False, @@ -62,11 +62,15 @@ def stitchee( concat_method either "xarray-concat" (default) or "xarray-combine". concat_dim - dimension along which to concatenate (default: ""). Not needed is concat_method is "xarray-combine". + dimension along which to concatenate (default: ""). + Not needed if concat_method is "xarray-combine". concat_kwargs keyword arguments to pass to xarray.concat or xarray.combine_by_coords (default: None). + sorting_variable + name of a variable to use for sorting datasets before concatenation by xarray. + E.g., `time`. history_to_append - json string to append to the history attribute of the concatenated file (default: None). + JSON string to append to the history attribute of the concatenated file (default: None). copy_input_files whether to copy input files or not (default: False). overwrite_output_file @@ -140,7 +144,12 @@ def stitchee( ) # Determine value for later dataset sorting. - first_value = xrds[flatten_string_with_groups(time_variable)].values.flatten()[0] + if sorting_variable: + first_value = xrds[ + flatten_string_with_groups(sorting_variable) + ].values.flatten()[0] + else: + first_value = i # first_value = xrds[concatenator.group_delim + concat_dim].values.flatten()[0] concat_dim_order.append(first_value) @@ -194,7 +203,7 @@ def stitchee( if write_tmp_flat_concatenated: logger.info("Writing concatenated flattened temporary file to disk...") - # Concatenated, yet still flat, file is written to disk for debugging. + # The concatenated, yet still flat, file is written to disk for debugging. tmp_flat_concatenated_path = add_label_to_path( output_file, label="_flat_intermediate" ) diff --git a/tests/integration/test_history_construction.py b/tests/integration/test_history_construction.py index 034e6de..04e5f88 100644 --- a/tests/integration/test_history_construction.py +++ b/tests/integration/test_history_construction.py @@ -28,7 +28,7 @@ def test_construct_and_append_history_for_sample_concatenation( concat_method="xarray-concat", history_to_append=new_history_json, concat_dim="step", - time_variable="step", + sorting_variable="step", ) stitcheed_dataset = xr.open_dataset(output_path)