Skip to content

Commit

Permalink
Merge branch 'pr/1.6' into pr/1.6_to_main
Browse files Browse the repository at this point in the history
  • Loading branch information
mpvanderschelling authored Aug 15, 2024
2 parents a01ca06 + f27235a commit 081ac7c
Show file tree
Hide file tree
Showing 22 changed files with 4,191 additions and 399 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ source = src
omit =
tests/*
**/__init__.py
src/f3dasm/_src/experimentdata/_experimental/*

[report]
# Regexes for lines to exclude from consideration
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.5.3
1.6.0
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
project = 'f3dasm'
author = 'Martin van der Schelling'
copyright = '2024, Martin van der Schelling'
version = '1.5.3'
release = '1.5.3'
version = '1.6.0'
release = '1.6.0'


# -- General configuration ----------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/f3dasm/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__: str = "1.5.3"
__version__: str = "1.6.0"
63 changes: 49 additions & 14 deletions src/f3dasm/_src/design/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from dataclasses import dataclass, field
from pathlib import Path
from typing import (Any, Dict, Iterable, Iterator, List, Literal, Optional,
Sequence, Type)
Protocol, Sequence, Type)

# Third-party core
import numpy as np
Expand All @@ -36,6 +36,13 @@
# =============================================================================


class _Data(Protocol):
def to_dataframe(self) -> pd.DataFrame:
...

# =============================================================================


@dataclass
class Domain:
"""Main class for defining the domain of the design of experiments.
Expand Down Expand Up @@ -238,19 +245,49 @@ def from_dataframe(cls, df_input: pd.DataFrame,

return cls(space=input_space, output_space=output_space)

@classmethod
def from_data(cls: Type[Domain],
input_data: _Data, output_data: _Data) -> Domain:
"""Initializes a Domain from input and output data.
Parameters
----------
input_data : _Data
Input data.
output_data : _Data
Output data.
Returns
-------
Domain
Domain object
"""
return cls.from_dataframe(
input_data.to_dataframe(), output_data.to_dataframe())

# Export
# =============================================================================

def store(self, filename: Path) -> None:
def store(self, filename: Path, create_tmp: bool = False) -> None:
"""Stores the Domain in a pickle file.
Parameters
----------
filename : str
Name of the file.
"""
with open(filename.with_suffix('.pkl'), 'wb') as f:
pickle.dump(self, f)
if create_tmp:
with open(filename.with_suffix('.tmp'), 'wb') as f:
pickle.dump(self, f)

# remove old file if it exists
filename.with_suffix('.pkl').unlink(missing_ok=True)

# rename the file to the correct extension
filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
else:
with open(filename.with_suffix('.pkl'), 'wb') as f:
pickle.dump(self, f)

def _cast_types_dataframe(self) -> dict:
"""Make a dictionary that provides the datatype of each parameter"""
Expand Down Expand Up @@ -645,9 +682,7 @@ def make_nd_continuous_domain(bounds: np.ndarray | List[List[float]],
return Domain(space)


def _domain_factory(domain: Domain | DictConfig | None,
input_data: pd.DataFrame,
output_data: pd.DataFrame) -> Domain:
def _domain_factory(domain: Domain | DictConfig | str | Path) -> Domain:
if isinstance(domain, Domain):
return domain

Expand All @@ -657,14 +692,14 @@ def _domain_factory(domain: Domain | DictConfig | None,
elif isinstance(domain, DictConfig):
return Domain.from_yaml(domain)

elif (input_data.empty and output_data.empty and domain is None):
return Domain()
# elif (input_data.empty and output_data.empty and domain is None):
# return Domain()

elif domain is None:
return Domain.from_dataframe(
input_data, output_data)
# elif domain is None:
# return Domain.from_dataframe(
# input_data, output_data)

else:
raise TypeError(
f"Domain must be of type Domain, DictConfig "
f"or None, not {type(domain)}")
f"Domain must be of type Domain, DictConfig, str or Path, "
f"not {type(domain)}")
9 changes: 8 additions & 1 deletion src/f3dasm/_src/experimentdata/_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from __future__ import annotations

# Standard
from typing import Dict, List, Optional
from typing import Dict, Iterable, List, Optional

# Authorship & Credits
# =============================================================================
Expand Down Expand Up @@ -123,3 +123,10 @@ def rename(self, old_name: str, new_name: str):
name of the column to replace with
"""
self.columns[new_name] = self.columns.pop(old_name)

def set_columnnames(self, names: Iterable[str]) -> None:
for old_name, new_name in zip(self.names, names):
self.rename(old_name, new_name)

def has_columnnames(self, names: Iterable[str]) -> None:
return set(names).issubset(self.names)
130 changes: 23 additions & 107 deletions src/f3dasm/_src/experimentdata/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ def from_file(cls, filename: Path | str) -> _Data:
return cls(df, columns=_Columns(_columns))

@classmethod
def from_numpy(cls: Type[_Data], array: np.ndarray) -> _Data:
def from_numpy(cls: Type[_Data],
array: np.ndarray, keys: Iterable[str]) -> _Data:
"""Loads the data from a numpy array.
Parameters
Expand All @@ -209,26 +210,6 @@ def from_dataframe(cls, dataframe: pd.DataFrame) -> _Data:
_columns = {name: None for name in dataframe.columns.to_list()}
return cls(dataframe, columns=_Columns(_columns))

def reset(self, domain: Optional[Domain] = None):
"""Resets the data to the initial state.
Parameters
----------
domain : Domain, optional
The domain of the experiment.
Note
----
If the domain is None, the data will be reset to an empty dataframe.
"""

if domain is None:
self.data = pd.DataFrame()
self.columns = _Columns()
else:
self.data = self.from_domain(domain).data
self.columns = self.from_domain(domain).columns

# Export
# =============================================================================

Expand Down Expand Up @@ -270,32 +251,7 @@ def to_dataframe(self) -> pd.DataFrame:
df.columns = self.names
return df.astype(object)

def combine_data_to_multiindex(self, other: _Data,
jobs_df: pd.DataFrame) -> pd.DataFrame:
"""Combine the data to a multiindex dataframe.
Parameters
----------
other : _Data
The other data to combine.
jobs : pd.DataFrame
The jobs dataframe.
Returns
-------
pd.DataFrame
The combined dataframe.
Note
----
This function is mainly used to show the combined ExperimentData
object in a Jupyter Notebook
"""
return pd.concat([jobs_df, self.to_dataframe(),
other.to_dataframe()],
axis=1, keys=['jobs', 'input', 'output'])

def store(self, filename: Path) -> None:
def store(self, filename: Path, create_tmp: bool = False) -> None:
"""Stores the data to a file.
Parameters
Expand All @@ -307,8 +263,19 @@ def store(self, filename: Path) -> None:
----
The data is stored as a csv file.
"""
# TODO: The column information is not saved in the .csv!
self.to_dataframe().to_csv(filename.with_suffix('.csv'))

if create_tmp:
self.to_dataframe().to_csv(filename.with_suffix('.tmp'))

# remove the old file if it exists
filename.with_suffix('.csv').unlink(missing_ok=True)

# rename the file to the correct extension
filename.with_suffix('.tmp').rename(filename.with_suffix('.csv'))

else:
# TODO: The column information is not saved in the .csv!
self.to_dataframe().to_csv(filename.with_suffix('.csv'))

def n_best_samples(self, nosamples: int,
column_name: List[str] | str) -> pd.DataFrame:
Expand Down Expand Up @@ -351,6 +318,7 @@ def select_columns(self, columns: Iterable[str] | str) -> _Data:
return _Data(
self.data[self.columns.iloc(columns)], columns=_selected_columns)

# TODO: Can we get rid of this method ?
def drop(self, columns: Iterable[str] | str) -> _Data:
"""Drop the selected columns from the data.
Expand All @@ -377,33 +345,6 @@ def drop(self, columns: Iterable[str] | str) -> _Data:
# Append and remove data
# =============================================================================

def add(self, data: pd.DataFrame):
try:
last_index = self.data.index[-1]
except IndexError: # Empty dataframe
self.data = data
return

new_indices = pd.RangeIndex(
start=last_index + 1, stop=last_index + len(data) + 1, step=1)

# set the indices of the data to new_indices
data.index = new_indices

self.data = pd.concat([self.data, data], ignore_index=False)

def add_empty_rows(self, number_of_rows: int):
if self.data.index.empty:
last_index = -1
else:
last_index = self.data.index[-1]

new_indices = pd.RangeIndex(
start=last_index + 1, stop=last_index + number_of_rows + 1, step=1)
empty_data = pd.DataFrame(
np.nan, index=new_indices, columns=self.data.columns)
self.data = pd.concat([self.data, empty_data], ignore_index=False)

def add_column(self, name: str, exist_ok: bool = False):
if name in self.columns.names:
if not exist_ok:
Expand All @@ -423,9 +364,6 @@ def add_column(self, name: str, exist_ok: bool = False):
def remove(self, indices: List[int]):
self.data = self.data.drop(indices)

def round(self, decimals: int):
self.data = self.data.round(decimals=decimals)

def overwrite(self, indices: Iterable[int], other: _Data | Dict[str, Any]):
if isinstance(other, Dict):
other = _convert_dict_to_data(other)
Expand All @@ -436,6 +374,7 @@ def overwrite(self, indices: Iterable[int], other: _Data | Dict[str, Any]):

self.data.update(other.data.set_index(pd.Index(indices)))

# TODO: Rename this method, it is not clear what it does
def join(self, __o: _Data) -> _Data:
"""Join two Data objects together.
Expand All @@ -455,6 +394,7 @@ def join(self, __o: _Data) -> _Data:
# Getters and setters
# =============================================================================

# TODO: Rename this method ? It is not clear what it does
def get_data_dict(self, index: int) -> Dict[str, Any]:
return self.to_dataframe().loc[index].to_dict()

Expand Down Expand Up @@ -510,31 +450,6 @@ def get_index_with_nan(self) -> pd.Index:
"""
return self.indices[self.data.isna().any(axis=1)]

def has_columnnames(self, names: Iterable[str]) -> bool:
return set(names).issubset(self.names)

def set_columnnames(self, names: Iterable[str]) -> None:
for old_name, new_name in zip(self.names, names):
self.columns.rename(old_name, new_name)

def cast_types(self, domain: Domain):
"""Cast the types of the data to the types of the domain.
Parameters
----------
domain : Domain
The domain with specific parameters to cast the types to.
Raises
------
ValueError
If the types of the domain and the data do not match.
"""
_dtypes = {index: parameter._type
for index, (_, parameter) in enumerate(
domain.space.items())}
self.data = self.data.astype(_dtypes)


def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
"""Converts a dictionary with scalar values to a data object.
Expand All @@ -555,7 +470,8 @@ def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
return _Data(data=df, columns=_Columns(_columns))


def _data_factory(data: DataTypes) -> _Data:
def _data_factory(data: DataTypes,
keys: Optional[Iterable[str]] = None) -> _Data:
if data is None:
return _Data()

Expand All @@ -566,10 +482,10 @@ def _data_factory(data: DataTypes) -> _Data:
return _Data.from_dataframe(data)

elif isinstance(data, (Path, str)):
return _Data.from_file(data)
return _Data.from_file(Path(data))

elif isinstance(data, np.ndarray):
return _Data.from_numpy(data)
return _Data.from_numpy(data, keys=keys)

else:
raise TypeError(
Expand Down
Loading

0 comments on commit 081ac7c

Please sign in to comment.