Add random seed specification option

IDAES · Dec 18, 2023 · 3af1da5 · 3af1da5
1 parent 9fcb8c3
commit 3af1da5
Show file tree

Hide file tree

Showing 2 changed files with 251 additions and 2 deletions.
diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py
@@ -480,6 +480,7 @@ def __init__(
         sampling_type=None,
         xlabels=None,
         ylabels=None,
+        rand_seed=None,
     ):
         """
         Initialization of **LatinHypercubeSampling** class. Two inputs are required.
@@ -496,6 +497,7 @@ def __init__(
         Keyword Args:
             xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input  variables.  Only used in "selection" mode. Default is None.
             ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None.
+            rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required).
 
         Returns:
             **self** function containing the input information
@@ -594,6 +596,12 @@ def __init__(
             self.number_of_samples = number_of_samples
             self.x_data = bounds_array  # Only x data will be present in this case
 
+        if rand_seed is not None:
+            if not isinstance(rand_seed, int):
+                raise TypeError("Random seed must be an integer.")
+            self.seed_value = rand_seed
+            np.random.seed(self.seed_value)
+
     def variable_sample_creation(self, variable_min, variable_max):
         """
 
@@ -1269,6 +1277,7 @@ def __init__(
         sampling_type=None,
         xlabels=None,
         ylabels=None,
+        rand_seed=None,
     ):
         """
         Initialization of CVTSampling class. Two inputs are required, while an optional option to control the solution accuracy may be specified.
@@ -1285,6 +1294,7 @@ def __init__(
         Keyword Args:
             xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input  variables.  Only used in "selection" mode. Default is None.
             ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None.
+            rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required).
             tolerance(float): Maximum allowable Euclidean distance between centres from consecutive iterations of the algorithm. Termination condition for algorithm.
 
                 - The smaller the value of tolerance, the better the solution but the longer the algorithm requires to converge. Default value is :math:`10^{-7}`.
@@ -1412,6 +1422,12 @@ def __init__(
             raise Exception("Invalid tolerance input")
         self.eps = tolerance
 
+        if rand_seed is not None:
+            if not isinstance(rand_seed, int):
+                raise TypeError("Random seed must be an integer.")
+            self.seed_value = rand_seed
+            np.random.seed(self.seed_value)
+
     @staticmethod
     def random_sample_selection(no_samples, no_features):
         """
@@ -1591,6 +1607,7 @@ def __init__(
         xlabels=None,
         ylabels=None,
         strictly_enforce_gaussian_bounds=False,
+        rand_seed=None,
     ):
         """
         Initialization of CustomSampling class. Four inputs are required.
@@ -1608,6 +1625,7 @@ def __init__(
         Keyword Args:
             xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input  variables.  Only used in "selection" mode. Default is None.
             ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None.
+            rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required).
             strictly_enforce_gaussian_bounds (bool): Boolean specifying whether the provided bounds for normal distributions should be strictly enforced. Note that selecting this option may affect the underlying distribution. Default is False.
 
         Returns:
@@ -1732,13 +1750,18 @@ def __init__(
             )
         self.normal_bounds_enforced = strictly_enforce_gaussian_bounds
 
+        if rand_seed is not None:
+            if not isinstance(rand_seed, int):
+                raise TypeError("Random seed must be an integer.")
+        self.seed_value = rand_seed
+
     def generate_from_dist(self, dist_name):
         if dist_name.lower() in ["uniform", "random"]:
-            dist = getattr(np.random.default_rng(), dist_name.lower())
+            dist = getattr(np.random.default_rng(self.seed_value), dist_name.lower())
             var_values = np.array(dist(size=self.number_of_samples))
             return dist, var_values
         elif dist_name.lower() == "normal":
-            dist = getattr(np.random.default_rng(), "normal")
+            dist = getattr(np.random.default_rng(self.seed_value), "normal")
             var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples)
             if not self.normal_bounds_enforced:
                 return dist, np.array(var_values)