py-econometrics · amichuda · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024 · Jun 24, 2024
diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml
@@ -21,16 +21,13 @@ jobs:
       - name: Checkout source
         uses: actions/checkout@v2
 
-      - name: Setup R
-        uses: r-lib/actions/setup-r@v2
-        with:
-          r-version: '4.2.0'
+      - name: Setup r2u
+        uses: eddelbuettel/github-actions/r2u-setup@master
 
-      - name: install fwildclusterboot for testing
-        run: Rscript -e 'install.packages("fwildclusterboot", repos="https://cloud.r-project.org")'
+      - name: install R packages
+        run: Rscript -e 'install.packages(c("fwildclusterboot"))'
         shell: bash
 
-
       - name: Setup python
         uses: actions/setup-python@v2
         with:

diff --git a/pyproject.toml b/pyproject.toml
@@ -48,6 +48,7 @@ pymdown-extensions = ">=10.0"
 mkdocstrings-python-legacy = "^0.2.3"
 mkdocstrings = {version = "^0.19.0", extras = ["python"], optional = true }
 pymdown-extensions = ">=10.0"
+rpy2 = "^3.5.16"
 
 
 [build-system]

diff --git a/tests/test_seeds.py b/tests/test_seeds.py
@@ -44,3 +44,16 @@ def test_results_from_same_seed(data):
         np.random.seed(123)
         b2 = wildboottest(model, param = "X1", cluster = x, B= 999)
         pd.testing.assert_frame_equal(a2,b2)
+
+def test_seeds_and_rng(data):
+    model = sm.ols(formula='Y ~ X1 + X2', data=data)    
+
+    cluster_list = [data.cluster, None]
+
+    for x in cluster_list: 
+
+        # specifying seed and rng with that seed -> same results
+        a = wildboottest(model, param = "X1", cluster = x, B= 999, seed=876587)
+        rng = np.random.default_rng(seed=876587)
+        b = wildboottest(model, param = "X1", cluster = x, B= 999, seed=rng)
+        pd.testing.assert_frame_equal(a,b)
diff --git a/tests/test_weights.py b/tests/test_weights.py
@@ -4,7 +4,6 @@
 import numpy as np
 import pandas as pd
 
-np.random.seed(89756)
 
 ts = list(wild_draw_fun_dict.keys())
 full_enum = [True, False]
@@ -13,6 +12,7 @@
 
 @pytest.fixture
 def data():
+    np.random.seed(12315)
     N = 100
     k = 2
     G= 20
@@ -46,9 +46,11 @@ def test_different_weights(data):
     X, y, cluster, bootcluster, R, B = data
 
     results_dict = {}
+
+    rng = np.random.default_rng(seed=0)
 
     for w in ts:
-        boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341)
+        boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = rng)
         boot.get_scores(bootstrap_type = "11", impose_null = True)
         boot.get_weights(weights_type = w)
         boot.get_numer()
@@ -60,7 +62,9 @@ def test_different_weights(data):
         results_dict[w] = boot.pvalue
 
     results_series = pd.Series(results_dict)
+    print(results_series)
 
     mapd = (results_series - results_series.mean()).abs().mean()  / results_series.mean()    
+    print(mapd)
 
     assert  mapd <= .1# make sure mean absolute percentage deviation is less than 10% (ad hoc)
diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py
@@ -5,6 +5,13 @@
 from wildboottest.weights import draw_weights
 import warnings
 from typing import Union, Tuple, Callable
+from numpy.random import Generator
+from statsmodels.regression.linear_model import OLS
+
+
+_allowed_models = (
+  OLS,
+)
 
 class WildDrawFunctionException(Exception):
     pass
@@ -55,7 +62,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
           R : Union[np.ndarray, pd.DataFrame],
           r: Union[np.ndarray, float],
           B: int,
-          seed:  Union[int, None] = None) -> None:
+          seed:  Union[int, Generator, None] = None) -> None:
 
         """Initializes the Heteroskedastic Wild Bootstrap Class
 
@@ -64,7 +71,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
           Y (Union[np.ndarray, pd.DataFrame, pd.Series]): Endogenous variable array or dataframe
           R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap
           B (int): bootstrap iterations
-          seed (Union[int, None], optional): Random seed for random weight types. Defaults to None.
+          seed (Union[int, Generator, None], optional): Random seed for random weight types. 
+          If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator 
+          can also be specified and used. Defaults to None.
 
         Raises:
           TypeError: Raise if input arrays are lists
@@ -85,10 +94,12 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
         else:
           self.Y = Y
 
-        if seed is None:
-          seed = np.random.randint(low = 1, high =  (2**32 - 1), size = 1, dtype=np.int64)
-
-        self.rng = np.random.default_rng(seed = seed)
+        if isinstance(seed, int):
+          self.rng = np.random.default_rng(seed=seed)
+        elif isinstance(seed, Generator):
+          self.rng = seed
+        else:
+          self.rng = np.random.default_rng()
 
         self.N = X.shape[0]
         self.k = X.shape[1]
@@ -274,7 +285,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
                R : Union[np.ndarray, pd.DataFrame],
                B: int,
                bootcluster: Union[np.ndarray, pd.DataFrame, pd.Series, None] = None,
-               seed:  Union[int, None] = None,
+               seed:  Union[int, Generator, None] = None,
                parallel: bool = True) -> None:
     """Initializes the Wild Cluster Bootstrap Class
 
@@ -285,7 +296,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
         R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap
         B (int): bootstrap iterations
         bootcluster (Union[np.ndarray, pd.DataFrame, pd.Series, None], optional): Sub-cluster array. Defaults to None.
-        seed (Union[int, None], optional): Random seed for random weight types. Defaults to None.
+        seed (Union[int, Generator, None], optional): Random seed for random weight types. 
+          If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator 
+          can also be specified and used. Defaults to None.        
         parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True.
     Raises:
         TypeError: Raise if input arrays are lists
@@ -326,11 +339,13 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series],
       self.bootclustid = np.unique(bootcluster)
       self.bootcluster = bootcluster
 
-    if seed is None:
-      seed = np.random.randint(low = 1, high =  (2**32 - 1), size = 1, dtype=np.int64)
-
-    self.rng = np.random.default_rng(seed = seed)
-
+    if isinstance(seed, int):
+      self.rng = np.random.default_rng(seed=seed)
+    elif isinstance(seed, Generator):
+      self.rng = seed
+    else:
+      self.rng = np.random.default_rng()
+
     self.N_G_bootcluster = len(self.bootclustid)
     self.G  = len(self.clustid)
 
@@ -640,14 +655,14 @@ def get_pvalue(self, pval_type = "two-tailed"):
       self.pvalue = np.mean(self.t_stat > self.t_boot)
 
 
-def wildboottest(model : 'OLS',
+def wildboottest(model : OLS,
                  B:int,
                  cluster : Union[np.ndarray, pd.Series, pd.DataFrame, None] = None,
                  param : Union[str, None] = None,
                  weights_type: str = 'rademacher',
                  impose_null: bool = True,
                  bootstrap_type: str = '11',
-                 seed: Union[str, None] = None,
+                 seed: Union[int, Generator, None] = None,
                  adj: bool = True,
                  cluster_adj: bool = True,
                  parallel: bool = True,
@@ -666,7 +681,9 @@ def wildboottest(model : 'OLS',
                            Defaults to True.
       bootstrap_type (str, optional):A string of length one. Allows to choose the bootstrap type
                           to be run. Either '11', '31', '13' or '33'. '11' by default. Defaults to '11'.
-      seed (Union[str, None], optional): Option to provide a random seed. Defaults to None.
+      seed (Union[int, Generator, None], optional): Random seed for random weight types. 
+        If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator 
+        can also be specified and used. Defaults to None.      
       adj (bool, optional): Whether to adjust for small sample. Defaults to True.
       cluster_adj (bool, optional): Whether to do a cluster-robust small sample correction. Defaults to True.
       parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True.
@@ -702,6 +719,9 @@ def wildboottest(model : 'OLS',
       >>> wildboottest(model, param = "X1", cluster = cluster, B = 9999)
       >>> wildboottest(model, cluster = cluster, B = 9999)
   """
+
+  if not isinstance(model, _allowed_models):
+    raise NotImplementedError(f"Only allow models of type {' ,'.join([str(i) for i in _allowed_models])}")
 
   # does model.exog already exclude missing values?
   X = model.exog