From 95fc695a89ae209139787e1dfa7cdd7e9de426a6 Mon Sep 17 00:00:00 2001
From: Jordan Stomps <stomps@wisc.edu>
Date: Wed, 18 Jan 2023 09:33:35 -0500
Subject: [PATCH] adjusting numpy.random.seed usage in cotraining

---
 models/SSML/CoTraining.py | 15 +++++++--------
 tests/test_models.py      | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/models/SSML/CoTraining.py b/models/SSML/CoTraining.py
index 16eac7d..9fda2db 100644
--- a/models/SSML/CoTraining.py
+++ b/models/SSML/CoTraining.py
@@ -16,20 +16,22 @@ class CoTraining:
     regression implementation with hyperparameter optimization.
     Data agnostic (i.e. user supplied data inputs).
     TODO: Currently only supports binary classification.
-        Add multinomial functions and unit tests.
-        Add functionality for regression(?)
+        - Add multinomial functions and unit tests.
+        - Add functionality for regression(?)
     Inputs:
     kwargs: logistic regression input functions.
-        keys random_state, max_iter, tol, and C supported.
-    random_state: int/float for reproducible intiailization.
+        keys seed, random_state, max_iter, tol, and C supported.
+        seed/random_state: int/float for reproducible intiailization.
     '''
 
     # only binary so far
     def __init__(self, **kwargs):
-        # supported keys = ['max_iter', 'tol', 'C', 'random_state']
+        # supported keys = ['max_iter', 'tol', 'C', 'random_state', 'seed']
         # defaults to a fixed value for reproducibility
         self.random_state = kwargs.pop('random_state', 0)
+        # set the random seed of training splits for reproducibility
         self.seed = kwargs.pop('seed', 0)
+        np.random.seed(self.seed)
         # parameters for cotraining logistic regression models:
         # defaults to sklearn.linear_model.LogisticRegression default vals
         self.max_iter = kwargs.pop('max_iter', 100)
@@ -236,9 +238,6 @@ def train(self, trainx, trainy, Ux,
         # avoid overwriting when deleting in co-training loop
         U_lr = Ux.copy()
 
-        # set the random seed of training splits for reproducibility
-        np.random.seed(self.seed)
-
         # TODO: allow a user to specify uneven splits between the two models
         split_frac = 0.5
         # labeled training data
diff --git a/tests/test_models.py b/tests/test_models.py
index b7bb087..334fc19 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -122,14 +122,16 @@ def test_pca():
 
 def test_LogReg():
     # test saving model input parameters
-    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0, 'random_state': 0}
     model = LogReg(max_iter=params['max_iter'],
                    tol=params['tol'],
-                   C=params['C'])
+                   C=params['C'],
+                   random_state=params['random_state'])
 
     assert model.model.max_iter == params['max_iter']
     assert model.model.tol == params['tol']
     assert model.model.C == params['C']
+    assert model.random_state == params['random_state']
 
     X_train, X_test, y_train, y_test = train_test_split(pytest.spectra,
                                                         pytest.labels,
@@ -187,10 +189,13 @@ def test_LogReg():
 
 def test_CoTraining():
     # test saving model input parameters
-    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0,
+              'random_state': 0, 'seed': 1}
     model = CoTraining(max_iter=params['max_iter'],
                        tol=params['tol'],
-                       C=params['C'])
+                       C=params['C'],
+                       random_state=params['random_state'],
+                       seed=params['seed'])
 
     assert model.model1.max_iter == params['max_iter']
     assert model.model1.tol == params['tol']
@@ -200,6 +205,9 @@ def test_CoTraining():
     assert model.model2.tol == params['tol']
     assert model.model2.C == params['C']
 
+    assert model.random_state == params['random_state']
+    assert model.seed == params['seed']
+
     X, Ux, y, Uy = train_test_split(pytest.spectra,
                                     pytest.labels,
                                     test_size=0.5,