Skip to content

Commit

Permalink
Add random seed specification option
Browse files Browse the repository at this point in the history
  • Loading branch information
OOAmusat committed Dec 18, 2023
1 parent 9fcb8c3 commit 3af1da5
Show file tree
Hide file tree
Showing 2 changed files with 251 additions and 2 deletions.
27 changes: 25 additions & 2 deletions idaes/core/surrogate/pysmo/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ def __init__(
sampling_type=None,
xlabels=None,
ylabels=None,
rand_seed=None,
):
"""
Initialization of **LatinHypercubeSampling** class. Two inputs are required.
Expand All @@ -496,6 +497,7 @@ def __init__(
Keyword Args:
xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None.
ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None.
rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required).
Returns:
**self** function containing the input information
Expand Down Expand Up @@ -594,6 +596,12 @@ def __init__(
self.number_of_samples = number_of_samples
self.x_data = bounds_array # Only x data will be present in this case

if rand_seed is not None:
if not isinstance(rand_seed, int):
raise TypeError("Random seed must be an integer.")
self.seed_value = rand_seed
np.random.seed(self.seed_value)

def variable_sample_creation(self, variable_min, variable_max):
"""
Expand Down Expand Up @@ -1269,6 +1277,7 @@ def __init__(
sampling_type=None,
xlabels=None,
ylabels=None,
rand_seed=None,
):
"""
Initialization of CVTSampling class. Two inputs are required, while an optional option to control the solution accuracy may be specified.
Expand All @@ -1285,6 +1294,7 @@ def __init__(
Keyword Args:
xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None.
ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None.
rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required).
tolerance(float): Maximum allowable Euclidean distance between centres from consecutive iterations of the algorithm. Termination condition for algorithm.
- The smaller the value of tolerance, the better the solution but the longer the algorithm requires to converge. Default value is :math:`10^{-7}`.
Expand Down Expand Up @@ -1412,6 +1422,12 @@ def __init__(
raise Exception("Invalid tolerance input")
self.eps = tolerance

if rand_seed is not None:
if not isinstance(rand_seed, int):
raise TypeError("Random seed must be an integer.")
self.seed_value = rand_seed
np.random.seed(self.seed_value)

@staticmethod
def random_sample_selection(no_samples, no_features):
"""
Expand Down Expand Up @@ -1591,6 +1607,7 @@ def __init__(
xlabels=None,
ylabels=None,
strictly_enforce_gaussian_bounds=False,
rand_seed=None,
):
"""
Initialization of CustomSampling class. Four inputs are required.
Expand All @@ -1608,6 +1625,7 @@ def __init__(
Keyword Args:
xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None.
ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None.
rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required).
strictly_enforce_gaussian_bounds (bool): Boolean specifying whether the provided bounds for normal distributions should be strictly enforced. Note that selecting this option may affect the underlying distribution. Default is False.
Returns:
Expand Down Expand Up @@ -1732,13 +1750,18 @@ def __init__(
)
self.normal_bounds_enforced = strictly_enforce_gaussian_bounds

if rand_seed is not None:
if not isinstance(rand_seed, int):
raise TypeError("Random seed must be an integer.")
self.seed_value = rand_seed

def generate_from_dist(self, dist_name):
if dist_name.lower() in ["uniform", "random"]:
dist = getattr(np.random.default_rng(), dist_name.lower())
dist = getattr(np.random.default_rng(self.seed_value), dist_name.lower())
var_values = np.array(dist(size=self.number_of_samples))
return dist, var_values
elif dist_name.lower() == "normal":
dist = getattr(np.random.default_rng(), "normal")
dist = getattr(np.random.default_rng(self.seed_value), "normal")
var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples)
if not self.normal_bounds_enforced:
return dist, np.array(var_values)
Expand Down
Loading

0 comments on commit 3af1da5

Please sign in to comment.