diff --git a/.vscode/settings.json b/.vscode/settings.json
index 6f25bfe..8486715 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -3,9 +3,5 @@
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
- "python.formatting.blackPath": "${workspaceFolder}/.venv/bin/black",
"python.terminal.activateEnvironment": true,
- "python.linting.flake8Enabled": true,
- "python.linting.pycodestyleEnabled": false,
- "python.linting.enabled": true
}
\ No newline at end of file
diff --git a/docs/api/data.md b/docs/api/data.md
new file mode 100644
index 0000000..631e5cb
--- /dev/null
+++ b/docs/api/data.md
@@ -0,0 +1,5 @@
+# Data classes and containers
+
+## `utilz.data`
+
+::: utilz.data
diff --git a/docs/api/generators.md b/docs/api/generators.md
new file mode 100644
index 0000000..8aecac7
--- /dev/null
+++ b/docs/api/generators.md
@@ -0,0 +1,5 @@
+# Generator tools
+
+## `utilz.genz`
+
+::: utilz.genz
diff --git a/docs/intro.ipynb b/docs/intro.ipynb
index 3e81f44..776016c 100644
--- a/docs/intro.ipynb
+++ b/docs/intro.ipynb
@@ -26,7 +26,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -1420,7 +1420,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -1454,103 +1454,103 @@
"
\n",
" \n",
" 0 | \n",
- " 0.045894 | \n",
- " 0.093716 | \n",
- " 0.932221 | \n",
+ " 0.897455 | \n",
+ " 0.329248 | \n",
+ " 0.190562 | \n",
" A | \n",
- " A1 | \n",
+ " 1.310156 | \n",
"
\n",
" \n",
" 1 | \n",
- " 0.738293 | \n",
- " 0.249943 | \n",
- " 0.518687 | \n",
+ " 0.411200 | \n",
+ " 0.151263 | \n",
+ " 0.204226 | \n",
" A | \n",
- " A1 | \n",
+ " -1.391970 | \n",
"
\n",
" \n",
" 2 | \n",
- " 0.357182 | \n",
- " 0.454217 | \n",
- " 0.575472 | \n",
+ " 0.670361 | \n",
+ " 0.213199 | \n",
+ " 0.398662 | \n",
" A | \n",
- " A1 | \n",
+ " 0.048193 | \n",
"
\n",
" \n",
" 3 | \n",
- " 0.289010 | \n",
- " 0.453426 | \n",
- " 0.211871 | \n",
+ " 0.590188 | \n",
+ " 0.940737 | \n",
+ " 0.826784 | \n",
" A | \n",
- " A1 | \n",
+ " -0.397329 | \n",
"
\n",
" \n",
" 4 | \n",
- " 0.328628 | \n",
- " 0.396641 | \n",
- " 0.041587 | \n",
+ " 0.739239 | \n",
+ " 0.175956 | \n",
+ " 0.304016 | \n",
" A | \n",
- " A1 | \n",
+ " 0.430950 | \n",
"
\n",
" \n",
" 5 | \n",
- " 0.481833 | \n",
- " 0.394005 | \n",
- " 0.503150 | \n",
+ " 0.708524 | \n",
+ " 0.960608 | \n",
+ " 0.286470 | \n",
" B | \n",
- " A1 | \n",
+ " 0.103200 | \n",
"
\n",
" \n",
" 6 | \n",
- " 0.430750 | \n",
- " 0.769627 | \n",
- " 0.838887 | \n",
+ " 0.851708 | \n",
+ " 0.004294 | \n",
+ " 0.302206 | \n",
" B | \n",
- " A1 | \n",
+ " 0.635292 | \n",
"
\n",
" \n",
" 7 | \n",
- " 0.882731 | \n",
- " 0.122181 | \n",
- " 0.393370 | \n",
+ " 0.309853 | \n",
+ " 0.954225 | \n",
+ " 0.954408 | \n",
" B | \n",
- " A1 | \n",
+ " -1.378318 | \n",
"
\n",
" \n",
" 8 | \n",
- " 0.622302 | \n",
- " 0.943480 | \n",
- " 0.715790 | \n",
+ " 0.535253 | \n",
+ " 0.212095 | \n",
+ " 0.627933 | \n",
" B | \n",
- " A1 | \n",
+ " -0.540699 | \n",
"
\n",
" \n",
" 9 | \n",
- " 0.419627 | \n",
- " 0.882003 | \n",
- " 0.629938 | \n",
+ " 0.998427 | \n",
+ " 0.934565 | \n",
+ " 0.602804 | \n",
" B | \n",
- " A1 | \n",
+ " 1.180524 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " A1 B1 C1 group A1_normed_by_group\n",
- "0 0.045894 0.093716 0.932221 A A1\n",
- "1 0.738293 0.249943 0.518687 A A1\n",
- "2 0.357182 0.454217 0.575472 A A1\n",
- "3 0.289010 0.453426 0.211871 A A1\n",
- "4 0.328628 0.396641 0.041587 A A1\n",
- "5 0.481833 0.394005 0.503150 B A1\n",
- "6 0.430750 0.769627 0.838887 B A1\n",
- "7 0.882731 0.122181 0.393370 B A1\n",
- "8 0.622302 0.943480 0.715790 B A1\n",
- "9 0.419627 0.882003 0.629938 B A1"
+ " A1 B1 C1 group A1_normed_by_group\n",
+ "0 0.897455 0.329248 0.190562 A 1.310156\n",
+ "1 0.411200 0.151263 0.204226 A -1.391970\n",
+ "2 0.670361 0.213199 0.398662 A 0.048193\n",
+ "3 0.590188 0.940737 0.826784 A -0.397329\n",
+ "4 0.739239 0.175956 0.304016 A 0.430950\n",
+ "5 0.708524 0.960608 0.286470 B 0.103200\n",
+ "6 0.851708 0.004294 0.302206 B 0.635292\n",
+ "7 0.309853 0.954225 0.954408 B -1.378318\n",
+ "8 0.535253 0.212095 0.627933 B -0.540699\n",
+ "9 0.998427 0.934565 0.602804 B 1.180524"
]
},
- "execution_count": 5,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -1577,7 +1577,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -1612,113 +1612,125 @@
" \n",
" \n",
" 0 | \n",
- " 0.045894 | \n",
- " 0.093716 | \n",
- " 0.932221 | \n",
+ " 0.897455 | \n",
+ " 0.329248 | \n",
+ " 0.190562 | \n",
" A | \n",
- " A1 | \n",
- " A1 | \n",
+ " 1.310156 | \n",
+ " 0.235766 | \n",
"
\n",
" \n",
" 1 | \n",
- " 0.738293 | \n",
- " 0.249943 | \n",
- " 0.518687 | \n",
+ " 0.411200 | \n",
+ " 0.151263 | \n",
+ " 0.204226 | \n",
" A | \n",
- " A1 | \n",
- " A1 | \n",
+ " -1.391970 | \n",
+ " -0.250488 | \n",
"
\n",
" \n",
" 2 | \n",
- " 0.357182 | \n",
- " 0.454217 | \n",
- " 0.575472 | \n",
+ " 0.670361 | \n",
+ " 0.213199 | \n",
+ " 0.398662 | \n",
" A | \n",
- " A1 | \n",
- " A1 | \n",
+ " 0.048193 | \n",
+ " 0.008672 | \n",
"
\n",
" \n",
" 3 | \n",
- " 0.289010 | \n",
- " 0.453426 | \n",
- " 0.211871 | \n",
+ " 0.590188 | \n",
+ " 0.940737 | \n",
+ " 0.826784 | \n",
" A | \n",
- " A1 | \n",
- " A1 | \n",
+ " -0.397329 | \n",
+ " -0.071500 | \n",
"
\n",
" \n",
" 4 | \n",
- " 0.328628 | \n",
- " 0.396641 | \n",
- " 0.041587 | \n",
+ " 0.739239 | \n",
+ " 0.175956 | \n",
+ " 0.304016 | \n",
" A | \n",
- " A1 | \n",
- " A1 | \n",
+ " 0.430950 | \n",
+ " 0.077551 | \n",
"
\n",
" \n",
" 5 | \n",
- " 0.481833 | \n",
- " 0.394005 | \n",
- " 0.503150 | \n",
+ " 0.708524 | \n",
+ " 0.960608 | \n",
+ " 0.286470 | \n",
" B | \n",
- " A1 | \n",
- " A1 | \n",
+ " 0.103200 | \n",
+ " 0.027771 | \n",
"
\n",
" \n",
" 6 | \n",
- " 0.430750 | \n",
- " 0.769627 | \n",
- " 0.838887 | \n",
+ " 0.851708 | \n",
+ " 0.004294 | \n",
+ " 0.302206 | \n",
" B | \n",
- " A1 | \n",
- " A1 | \n",
+ " 0.635292 | \n",
+ " 0.170955 | \n",
"
\n",
" \n",
" 7 | \n",
- " 0.882731 | \n",
- " 0.122181 | \n",
- " 0.393370 | \n",
+ " 0.309853 | \n",
+ " 0.954225 | \n",
+ " 0.954408 | \n",
" B | \n",
- " A1 | \n",
- " A1 | \n",
+ " -1.378318 | \n",
+ " -0.370900 | \n",
"
\n",
" \n",
" 8 | \n",
- " 0.622302 | \n",
- " 0.943480 | \n",
- " 0.715790 | \n",
+ " 0.535253 | \n",
+ " 0.212095 | \n",
+ " 0.627933 | \n",
" B | \n",
- " A1 | \n",
- " A1 | \n",
+ " -0.540699 | \n",
+ " -0.145500 | \n",
"
\n",
" \n",
" 9 | \n",
- " 0.419627 | \n",
- " 0.882003 | \n",
- " 0.629938 | \n",
+ " 0.998427 | \n",
+ " 0.934565 | \n",
+ " 0.602804 | \n",
" B | \n",
- " A1 | \n",
- " A1 | \n",
+ " 1.180524 | \n",
+ " 0.317675 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " A1 B1 C1 group A1_normed_by_group A1_centered_by_group\n",
- "0 0.045894 0.093716 0.932221 A A1 A1\n",
- "1 0.738293 0.249943 0.518687 A A1 A1\n",
- "2 0.357182 0.454217 0.575472 A A1 A1\n",
- "3 0.289010 0.453426 0.211871 A A1 A1\n",
- "4 0.328628 0.396641 0.041587 A A1 A1\n",
- "5 0.481833 0.394005 0.503150 B A1 A1\n",
- "6 0.430750 0.769627 0.838887 B A1 A1\n",
- "7 0.882731 0.122181 0.393370 B A1 A1\n",
- "8 0.622302 0.943480 0.715790 B A1 A1\n",
- "9 0.419627 0.882003 0.629938 B A1 A1"
+ " A1 B1 C1 group A1_normed_by_group \\\n",
+ "0 0.897455 0.329248 0.190562 A 1.310156 \n",
+ "1 0.411200 0.151263 0.204226 A -1.391970 \n",
+ "2 0.670361 0.213199 0.398662 A 0.048193 \n",
+ "3 0.590188 0.940737 0.826784 A -0.397329 \n",
+ "4 0.739239 0.175956 0.304016 A 0.430950 \n",
+ "5 0.708524 0.960608 0.286470 B 0.103200 \n",
+ "6 0.851708 0.004294 0.302206 B 0.635292 \n",
+ "7 0.309853 0.954225 0.954408 B -1.378318 \n",
+ "8 0.535253 0.212095 0.627933 B -0.540699 \n",
+ "9 0.998427 0.934565 0.602804 B 1.180524 \n",
+ "\n",
+ " A1_centered_by_group \n",
+ "0 0.235766 \n",
+ "1 -0.250488 \n",
+ "2 0.008672 \n",
+ "3 -0.071500 \n",
+ "4 0.077551 \n",
+ "5 0.027771 \n",
+ "6 0.170955 \n",
+ "7 -0.370900 \n",
+ "8 -0.145500 \n",
+ "9 0.317675 "
]
},
- "execution_count": 6,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -1726,6 +1738,13 @@
"source": [
"new_df.norm_by_group('group', 'A1', scale=False)"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
diff --git a/mkdocs.yml b/mkdocs.yml
index 2cbed0e..e204349 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -25,12 +25,14 @@ nav:
- I/O: api/io.md
- Function ops: api/ops.md
- Maps: api/maps.md
+ - Generators: api/generators.md
- Shorthands: api/shorts.md
- Pipes: api/pipes.md
- Dataframe verbs and tools: api/df.md
- Plotting tools: api/plot.md
- Boilerplate helpers: api/boilerplate.md
- Decorators: api/decorators.md
+ - Dataclasses: api/data.md
plugins:
- mkdocs-jupyter:
ignore: ["dev/*"]
diff --git a/utilz/__init__.py b/utilz/__init__.py
index 85d7224..4f858f3 100644
--- a/utilz/__init__.py
+++ b/utilz/__init__.py
@@ -8,3 +8,5 @@
from .boilerplate import *
from .stats import *
from .pipes import *
+from .data import *
+from .genz import *
diff --git a/utilz/data.py b/utilz/data.py
new file mode 100644
index 0000000..1d308d4
--- /dev/null
+++ b/utilz/data.py
@@ -0,0 +1,132 @@
+"""
+Data containers
+"""
+
+from .maps import map
+
+
+class Box(list):
+
+ """
+ Box is a flexible list-like container that allows for dot-notation access to attributes of its elements and methods. This makes it easy for example to perform a `.groupby()` operation on a list of dataframes.
+
+ Boxes can be transparent or opaque. Transparent boxes always return the result of an operation as a list. This is useful for example when you want to call a method on each box element and immediately work with the results.
+
+ Opaque boxes always return a new `Box` who's contents can be accessed using `.contents()` or slice notation `box[:]`. This allows for method chaining on the underlying data.
+
+ Examples:
+ >>> # Transparent box by default
+ >>> box = Box([df1, df2, df3])
+
+ >>> # Access content like a list
+ >>> box[0] # returns df1
+ >>> box[:] # returns [df1, df2, df3]
+ >>> box.contents() # equivalent
+
+ >>> # Access attributes or call methods just like you would on a single object
+ >>> box.head(10) # returns a list each df head
+ >>> box.shape # returns a list each df shape
+
+ >>> # Opaque box facilitates method chaining but need
+ >>> # `.contents()` to access results
+ >>> black_box = Box([df1, df2, df3], transparent=False)
+ >>> black_box.groupby('col').mean().contents()
+
+ >>> # Apply arbitrary functions to box elements
+ >>> result = box.map(lambda x: x + 1)
+
+ >>> # Can also modify in place without returning anything
+ >>> box.map(lambda x: x + 1, inplace=True)
+
+ >>> # Map respects box transparency for method chaining
+ >>> box.set_transparent(False)
+ >>> result = box.map(lambda x: x + 1).head().contents()
+ """
+
+ def __init__(self, iterable, transparent=True):
+ """
+ Create a new box from an iterable
+
+ Args:
+ list (iterable): iterable of objects to store in the box
+ transparent (bool): whether methods should return results (`True`) or a new box (`False`); Default True
+ """
+ super().__init__(iterable)
+ self._transparent_box = transparent
+
+ def __getattr__(self, name, *args, **kwargs):
+ if hasattr(self[0], name):
+ attr_or_method = getattr(self[0], name)
+ if callable(attr_or_method):
+
+ def fn(*args, **kwargs):
+ out = []
+ for elem in self:
+ result = getattr(elem, name)
+ result = result(*args, **kwargs) if callable(result) else result
+ out.append(result)
+ out = (
+ out
+ if self._transparent_box
+ else Box(out, transparent=self._transparent_box)
+ )
+ return out
+
+ return fn
+ else:
+ out = [getattr(elem, name) for elem in self]
+ out = (
+ out
+ if self._transparent_box
+ else Box(out, transparent=self._transparent_box)
+ )
+ return out
+
+ else:
+ raise AttributeError
+
+ def __repr__(self):
+ return f"Box(len={len(self)}, transparent={self._transparent_box}, type={self[0].__class__.__module__}.{self[0].__class__.__name__})"
+
+ def map(self, fn, inplace=False):
+ """
+ Apply a function to each element in the box
+
+ Args:
+ fn (callable): function to apply to each element
+ inplace (bool, optional): whether to modify the box in place or return a new box. Defaults to False.
+ *args: positional arguments to pass to `fn`
+ **kwargs: keyword arguments to pass to `fn`
+
+ Returns:
+ Box: new box with the results of applying `fn` to each element
+ """
+
+ out = map(fn, self)
+ if inplace:
+ self.__init__(out, transparent=self._transparent_box)
+ else:
+ out = (
+ out
+ if self._transparent_box
+ else Box(out, transparent=self._transparent_box)
+ )
+ return out
+
+ def contents(self):
+ """
+ Convert box to list
+
+ Returns:
+ list: list of elements
+ """
+ return list(self)
+
+ def set_transparent(self, transparent):
+ """
+ Set the transparency of the box
+
+ Args:
+ transparent (bool): whether the box should be transparent or not
+ """
+ self._transparent_box = transparent
diff --git a/utilz/dev/data.py b/utilz/dev/data.py
new file mode 100644
index 0000000..a41305f
--- /dev/null
+++ b/utilz/dev/data.py
@@ -0,0 +1,115 @@
+# %%
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from dataclasses import dataclass
+from functools import cached_property
+from utilz import randdf, equal
+from shutil import rmtree
+from utilz import discard
+
+
+# %% Subject data container
+@dataclass
+class Subject:
+ """Data container that makes uses of new cached_property decorator"""
+
+ sid: str
+ data_dir: Path
+
+ def __post_init__(self):
+ self.data_dir = self.data_dir / self.sid
+
+ @cached_property
+ def data(self):
+ print("Loading data")
+ return pd.read_csv(self.data_dir / "data.csv")
+
+ def clear_cache(self):
+ del self.data
+
+
+# %% Subject data test
+def test_subject():
+ # Setup data folders
+ DATA_DIR = Path(".")
+ SID = "sid001"
+ SUB_PATH = DATA_DIR / SID
+ SUB_PATH.mkdir(exist_ok=True)
+ FNAME = "data.csv"
+
+ # Make data
+ data = randdf()
+ data.to_csv(SUB_PATH / FNAME, index=False)
+
+ # Create subject
+ s = Subject(SID, DATA_DIR)
+
+ assert equal(s.data, data)
+
+ # Change data on disk
+ data = randdf()
+ data.to_csv(SUB_PATH / FNAME, index=False)
+
+ # Second time it's cached
+ assert not equal(s.data, data)
+
+ # Clear cache
+ s.clear_cache()
+
+ # Third time it's not cached
+ assert equal(s.data, data)
+
+ # Remove data
+ rmtree(SUB_PATH)
+
+
+# %% Box data container
+
+
+class Box(list):
+ def __init__(self, iterable):
+ super().__init__(iterable)
+
+ def __getattr__(self, name, *args, **kwargs):
+ if hasattr(self[0], name):
+ attr_or_method = getattr(self[0], name)
+ if callable(attr_or_method):
+
+ def fn(*args, **kwargs):
+ out = []
+ for elem in self:
+ result = getattr(elem, name)
+ result = result(*args, **kwargs) if callable(result) else result
+ out.append(result)
+ return out
+
+ return fn
+ else:
+ return [getattr(elem, name) for elem in self]
+ else:
+ raise AttributeError
+
+ def __repr__(self):
+ return f"Box(len={len(self)}, type={self[0].__class__.__module__}.{self[0].__class__.__name__})"
+
+
+# %%
+# Mock class that has attribute access to its underlying data
+df = randdf()
+df.data = df.to_numpy()
+b = Box([df, df])
+b
+
+
+# %%
+b.data
+b.head()
+
+# %%
+box = Box([np.random.randn(10) for i in range(10)])
+
+# %% Run Tests
+test_subject()
+
+# %%
diff --git a/utilz/dftools.py b/utilz/dftools.py
index e18f313..589833d 100644
--- a/utilz/dftools.py
+++ b/utilz/dftools.py
@@ -79,9 +79,10 @@ def _norm(dat, center, scale):
elif center and scale:
idx = "normed"
+ out = out.to_dict()
assign_dict = {}
- for valcol, col in zip(valcols, out):
- assign_dict[f"{valcol}_{idx}_by_{grpcol}"] = col
+ for key in out.keys():
+ assign_dict[f"{key}_{idx}_by_{grpcol}"] = out[key]
out = df.assign(**assign_dict)
return out.squeeze()
diff --git a/utilz/genz.py b/utilz/genz.py
new file mode 100644
index 0000000..5bcd389
--- /dev/null
+++ b/utilz/genz.py
@@ -0,0 +1,40 @@
+"""
+ Tools for working with generators. Also see the `toolz` library!
+"""
+
+
+def make_gen(iterable):
+ """Turn any iterable into a generator"""
+ return (e for e in iterable)
+
+
+def combine_gens(*iterables):
+ """
+ Combine multiple generators into a single generator based on every
+ unique combination of their elements. This is equivalent to a series of
+ nested for loops just like itertools.product(). But unlike itertools.product
+ doesn't exhaust each generator before combining them.
+
+ Examples:
+ >>> # This
+ >>> for aa in a:
+ >>> for bb in b:
+ >>> for cc in c:
+ >>> func(aa, bb, cc)
+
+ >>> # Becomes this
+ >>> for aa, bb, cc in combine_gens(a, b, c):
+ >>> func(aa, bb, cc)
+
+ Yields:
+ generator
+ """
+ if not iterables:
+ # Base case: If no generators are provided, yield an empty tuple.
+ yield ()
+ else:
+ # Recursive case: Combine the first generator with combinations from the rest.
+ first_gen, *remaining_gens = iterables
+ for item in first_gen:
+ for sub_combination in combine_gens(*remaining_gens):
+ yield (item,) + sub_combination
diff --git a/utilz/plot.py b/utilz/plot.py
index 7d19bf8..389df67 100644
--- a/utilz/plot.py
+++ b/utilz/plot.py
@@ -79,6 +79,7 @@ def stripbarplot(
if pointcolor == "hue":
ax = sns.stripplot(*args, **kwargs, data=data, ax=ax, alpha=alpha)
else:
+ _ = kwargs.pop("palette", None)
ax = sns.stripplot(
*args, **kwargs, color=pointcolor, data=data, ax=ax, alpha=alpha
)
@@ -132,6 +133,7 @@ def savefig(
vector: bool = True,
use_subdirs: bool = True,
raster_extension: str = "jpg",
+ vector_extension: str = "svg",
bbox_inches: str = "tight",
overwrite: bool = True,
**kwargs,
@@ -163,10 +165,10 @@ def savefig(
path = Path.cwd()
if use_subdirs:
raster_path = path / "raster" / f"{name}.{raster_extension}"
- vector_path = path / "vector" / f"{name}.pdf"
+ vector_path = path / "vector" / f"{name}.{vector_extension}"
else:
raster_path = path / f"{name}.{raster_extension}"
- vector_path = path / f"{name}.pdf"
+ vector_path = path / f"{name}.{vector_extension}"
if not raster_path.parent.exists():
raster_path.parent.mkdir()
if not vector_path.parent.exists():
diff --git a/utilz/shorts.py b/utilz/shorts.py
index b7e0eae..a43dc8e 100644
--- a/utilz/shorts.py
+++ b/utilz/shorts.py
@@ -42,23 +42,23 @@ def discard(*args, **kwargs):
return filter(*args, invert=invert, **kwargs)
-def seq(n):
+def seq(*args):
"""Enumerated `list`"""
- return list(range(n))
+ return list(range(*args))
def equal(*seqs):
"""
Checks if N args of potentionally different lengths are equal.
Non-iterable args are directly compared with `==`
- Dataframes and arrays use `.equals()` and `np.allclose()` respectively
+ Dataframes and arrays both use `np.allclose()` for comparison
"""
if not isinstance(seqs[0], Iterable):
return checkall(lambda e: e == seqs[0], seqs)
if isinstance(seqs[0], pd.DataFrame):
- return checkall(lambda e: e.equals(seqs[0]), seqs)
+ return checkall(lambda e: np.allclose(e.to_numpy(), seqs[0].to_numpy()), seqs)
if isinstance(seqs[0], np.ndarray):
return checkall(lambda e: np.allclose(e, seqs[0]), seqs)
diff --git a/utilz/tests/test_data.py b/utilz/tests/test_data.py
new file mode 100644
index 0000000..85e9e3c
--- /dev/null
+++ b/utilz/tests/test_data.py
@@ -0,0 +1,102 @@
+import numpy as np
+import pandas as pd
+from utilz.data import Box
+from utilz import randdf, equal, map, seq
+import pytest
+
+
+def test_box():
+ # Dataframe data
+ df_data = [
+ randdf((20, 3), groups={"condition": 2, "group": 4}),
+ randdf((20, 3), groups={"condition": 2, "group": 4}),
+ ]
+
+ box = Box(df_data)
+
+ # We can get the data in a box by slicing it or using .contents()
+ assert box[0].equals(df_data[0])
+ assert all([x.equals(y) for x, y in zip(box[:], df_data)])
+ assert all([x.equals(y) for x, y in zip(box.contents(), df_data)])
+
+ # By default boxes are transparent and always return the contents of an operation
+
+ # Method access
+ out = box.head()
+ assert isinstance(out, list)
+
+ correct = map(lambda x: x.head(), df_data)
+ assert all([x.equals(y) for x, y in zip(out, correct)])
+
+ # Attribute access
+ out = box.shape
+ assert isinstance(out, list)
+
+ correct = map(lambda x: x.shape, df_data)
+ assert all(map(lambda tup: equal(*tup), zip(out, correct)))
+
+ # Numpy arrays
+ data = [np.random.randn(10) for i in range(10)]
+ box = Box(data)
+
+ # Method access
+ out = box.mean()
+ assert isinstance(out, list)
+
+ correct = map(lambda x: x.mean(), data)
+ assert all(map(lambda tup: equal(*tup), zip(out, correct)))
+
+ # Attribute access
+ out = box.shape
+ assert isinstance(out, list)
+
+ correct = map(lambda x: x.shape, data)
+ assert all(map(lambda tup: equal(*tup), zip(out, correct)))
+
+ # Opaque boxes return a new box who's contents can be accessed using .contents()
+ black_box = Box(df_data, transparent=False)
+ assert isinstance(black_box.head(), Box)
+ assert isinstance(black_box.head().contents(), list)
+ # slice notation works too
+ assert isinstance(black_box.head()[:], list)
+
+ # Opaque boxes are useful for method chaining on underlying data
+ out = black_box.groupby("group").mean().contents()
+ assert isinstance(out, list)
+
+ # Doesn't work cause box is transparent
+ with pytest.raises(AttributeError):
+ box = Box(df_data)
+ box.group_by("group").mean()
+
+ # We can change transparency on the fly
+ box.set_transparent(False)
+ compare = box.groupby("group").mean().contents()
+ assert all([x.equals(y) for x, y in zip(out, compare)])
+
+ # Applying arbitrary functions to box elements
+ data = seq(10)
+ correct = seq(1, 11)
+ box = Box(data)
+
+ # By default map returns the result of the operation just like calling other
+ # attributes or methods
+ result = box.map(lambda x: x + 1)
+ assert isinstance(result, list)
+ assert equal(result, correct)
+
+ # Maps respect box transparency which is useful for method chaing
+ box = Box(df_data, transparent=False)
+ result = box.map(lambda x: x["A1"] + 1).head().contents()
+ assert isinstance(result, list)
+
+ # Map operations can also happen inplace which will change box contents without
+ # returning anything.
+ box = Box(data)
+ box.map(lambda x: x + 1, inplace=True)
+ assert equal(box.contents(), correct)
+
+ # In place doesn't care if the box is transparent or not.
+ box = Box(data, transparent=False)
+ box.map(lambda x: x + 1, inplace=True)
+ assert equal(box.contents(), correct)
diff --git a/utilz/tests/test_genz.py b/utilz/tests/test_genz.py
new file mode 100644
index 0000000..61978bb
--- /dev/null
+++ b/utilz/tests/test_genz.py
@@ -0,0 +1,20 @@
+from types import GeneratorType
+from utilz.genz import make_gen, combine_gens
+import pytest
+
+
+def test_make_gen():
+ l = list(range(10))
+ g = make_gen(l)
+ assert isinstance(g, GeneratorType)
+ breakpoint()
+
+
+def test_combine_gens():
+ l1 = list(range(10))
+ l2 = list(range(5))
+ l3 = list(range(3))
+ l4 = list(range(20))
+ out = list(combine_gens(l1, l2, l3, l4))
+ assert len(out) == len(l1) * len(l2) * len(l3) * len(l4)
+ assert out[-1] == (9, 4, 2, 19)
diff --git a/utilz/tests/test_ops.py b/utilz/tests/test_ops.py
index 7252617..0490216 100644
--- a/utilz/tests/test_ops.py
+++ b/utilz/tests/test_ops.py
@@ -23,7 +23,6 @@
discard,
seq,
equal,
- fork,
)
from utilz.plot import tweak
from utilz.boilerplate import randdf
diff --git a/utilz/tests/test_plot.py b/utilz/tests/test_plot.py
index f2391c7..01af78c 100644
--- a/utilz/tests/test_plot.py
+++ b/utilz/tests/test_plot.py
@@ -28,7 +28,7 @@ def test_savefig(tmp_path: Path):
# Save to cwd
f, _ = mpinit(subplots=(2, 2))
save_raster = Path.cwd() / "raster" / "test.jpg"
- save_vector = Path.cwd() / "vector" / "test.pdf"
+ save_vector = Path.cwd() / "vector" / "test.svg"
savefig(f, "test")
plt.close(f)
assert save_raster.exists()
@@ -41,7 +41,7 @@ def test_savefig(tmp_path: Path):
# Save to custom path
tmp_path = Path(tmp_path)
dir_save_raster = tmp_path / "test.jpg"
- dir_save_vector = tmp_path / "test.pdf"
+ dir_save_vector = tmp_path / "test.svg"
f, _ = mpinit(subplots=(2, 2))
savefig(f, "test", path=tmp_path, use_subdirs=False)