From 197133d438aa27d590585a991f87145c11c6c33a Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Thu, 20 Jun 2024 09:49:13 +0200
Subject: [PATCH 01/17] Update version to 1.6.0

---
 VERSION                   | 2 +-
 docs/source/conf.py       | 4 ++--
 src/f3dasm/__version__.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/VERSION b/VERSION
index 8e03717d..ce6a70b9 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.5.1
\ No newline at end of file
+1.6.0
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 888f8ada..c480a49b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -27,8 +27,8 @@
 project = 'f3dasm'
 author = 'Martin van der Schelling'
 copyright = '2024, Martin van der Schelling'
-version = '1.5.1'
-release = '1.5.1'
+version = '1.6.0'
+release = '1.6.0'
 
 
 # -- General configuration ----------------------------------------------------
diff --git a/src/f3dasm/__version__.py b/src/f3dasm/__version__.py
index 90fb960e..465e3feb 100644
--- a/src/f3dasm/__version__.py
+++ b/src/f3dasm/__version__.py
@@ -1 +1 @@
-__version__: str = "1.5.1"
+__version__: str = "1.6.0"

From d3fcef61b399e3374f4c464abbb519781635825d Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Thu, 20 Jun 2024 10:02:30 +0200
Subject: [PATCH 02/17] comment unused funcation in _Data for future dev

---
 src/f3dasm/_src/experimentdata/_data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py
index b75bf379..23cf153d 100644
--- a/src/f3dasm/_src/experimentdata/_data.py
+++ b/src/f3dasm/_src/experimentdata/_data.py
@@ -209,6 +209,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame) -> _Data:
         _columns = {name: None for name in dataframe.columns.to_list()}
         return cls(dataframe, columns=_Columns(_columns))
 
+    # NOT USED
     def reset(self, domain: Optional[Domain] = None):
         """Resets the data to the initial state.
 

From b6fbdcb0acf7792af719b20a50651fdf14a7f175 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Thu, 20 Jun 2024 10:13:25 +0200
Subject: [PATCH 03/17] Fix formatting issues in parameter and experimentdata
 modules

---
 src/f3dasm/_src/design/parameter.py              | 2 +-
 src/f3dasm/_src/experimentdata/experimentdata.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/f3dasm/_src/design/parameter.py b/src/f3dasm/_src/design/parameter.py
index 7d21bc70..07b67e3f 100644
--- a/src/f3dasm/_src/design/parameter.py
+++ b/src/f3dasm/_src/design/parameter.py
@@ -279,7 +279,7 @@ def _check_range(self):
             raise ValueError("step size must be larger than 0!")
 
 
-@ dataclass
+@dataclass
 class _CategoricalParameter(_Parameter):
     """Create a search space parameter that is categorical
 
diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index f8dd6705..55af5d16 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -240,7 +240,7 @@ def wrapper_func(self: ExperimentData, *args, **kwargs) -> None:
     #                                                                Properties
     # =========================================================================
 
-    @ property
+    @property
     def index(self) -> pd.Index:
         """Returns an iterable of the job number of the experiments
 
@@ -257,7 +257,7 @@ def index(self) -> pd.Index:
     #                                                  Alternative Constructors
     # =========================================================================
 
-    @ classmethod
+    @classmethod
     def from_file(cls: Type[ExperimentData],
                   project_dir: Path | str) -> ExperimentData:
         """Create an ExperimentData object from .csv and .json files.

From 8918dca00243416caaf8ec932d2bc763aadf2199 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Thu, 20 Jun 2024 11:38:54 +0200
Subject: [PATCH 04/17] Round input data to 6 decimal places and refactor
 combine_data_to_multiindex function

---
 src/f3dasm/_src/experimentdata/_data.py       | 97 +------------------
 .../_src/experimentdata/experimentdata.py     | 32 +++++-
 tests/design/test_data.py                     | 15 ---
 tests/experimentdata/conftest.py              |  5 +-
 tests/sampling/test_sampling.py               |  2 +-
 5 files changed, 35 insertions(+), 116 deletions(-)

diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py
index 23cf153d..42a04bc6 100644
--- a/src/f3dasm/_src/experimentdata/_data.py
+++ b/src/f3dasm/_src/experimentdata/_data.py
@@ -209,27 +209,6 @@ def from_dataframe(cls, dataframe: pd.DataFrame) -> _Data:
         _columns = {name: None for name in dataframe.columns.to_list()}
         return cls(dataframe, columns=_Columns(_columns))
 
-    # NOT USED
-    def reset(self, domain: Optional[Domain] = None):
-        """Resets the data to the initial state.
-
-        Parameters
-        ----------
-        domain : Domain, optional
-            The domain of the experiment.
-
-        Note
-        ----
-        If the domain is None, the data will be reset to an empty dataframe.
-        """
-
-        if domain is None:
-            self.data = pd.DataFrame()
-            self.columns = _Columns()
-        else:
-            self.data = self.from_domain(domain).data
-            self.columns = self.from_domain(domain).columns
-
 #                                                                        Export
 # =============================================================================
 
@@ -271,31 +250,6 @@ def to_dataframe(self) -> pd.DataFrame:
         df.columns = self.names
         return df.astype(object)
 
-    def combine_data_to_multiindex(self, other: _Data,
-                                   jobs_df: pd.DataFrame) -> pd.DataFrame:
-        """Combine the data to a multiindex dataframe.
-
-        Parameters
-        ----------
-        other : _Data
-            The other data to combine.
-        jobs : pd.DataFrame
-            The jobs dataframe.
-
-        Returns
-        -------
-        pd.DataFrame
-            The combined dataframe.
-
-        Note
-        ----
-        This function is mainly used to show the combined ExperimentData
-        object in a Jupyter Notebook
-        """
-        return pd.concat([jobs_df, self.to_dataframe(),
-                          other.to_dataframe()],
-                         axis=1, keys=['jobs', 'input', 'output'])
-
     def store(self, filename: Path) -> None:
         """Stores the data to a file.
 
@@ -352,6 +306,7 @@ def select_columns(self, columns: Iterable[str] | str) -> _Data:
         return _Data(
             self.data[self.columns.iloc(columns)], columns=_selected_columns)
 
+    # TODO: Can we get rid of this method ?
     def drop(self, columns: Iterable[str] | str) -> _Data:
         """Drop the selected columns from the data.
 
@@ -378,33 +333,6 @@ def drop(self, columns: Iterable[str] | str) -> _Data:
 #                                                        Append and remove data
 # =============================================================================
 
-    def add(self, data: pd.DataFrame):
-        try:
-            last_index = self.data.index[-1]
-        except IndexError:  # Empty dataframe
-            self.data = data
-            return
-
-        new_indices = pd.RangeIndex(
-            start=last_index + 1, stop=last_index + len(data) + 1, step=1)
-
-        # set the indices of the data to new_indices
-        data.index = new_indices
-
-        self.data = pd.concat([self.data, data], ignore_index=False)
-
-    def add_empty_rows(self, number_of_rows: int):
-        if self.data.index.empty:
-            last_index = -1
-        else:
-            last_index = self.data.index[-1]
-
-        new_indices = pd.RangeIndex(
-            start=last_index + 1, stop=last_index + number_of_rows + 1, step=1)
-        empty_data = pd.DataFrame(
-            np.nan, index=new_indices, columns=self.data.columns)
-        self.data = pd.concat([self.data, empty_data], ignore_index=False)
-
     def add_column(self, name: str, exist_ok: bool = False):
         if name in self.columns.names:
             if not exist_ok:
@@ -424,9 +352,6 @@ def add_column(self, name: str, exist_ok: bool = False):
     def remove(self, indices: List[int]):
         self.data = self.data.drop(indices)
 
-    def round(self, decimals: int):
-        self.data = self.data.round(decimals=decimals)
-
     def overwrite(self, indices: Iterable[int], other: _Data | Dict[str, Any]):
         if isinstance(other, Dict):
             other = _convert_dict_to_data(other)
@@ -437,6 +362,7 @@ def overwrite(self, indices: Iterable[int], other: _Data | Dict[str, Any]):
 
         self.data.update(other.data.set_index(pd.Index(indices)))
 
+    # TODO: Rename this method, it is not clear what it does
     def join(self, __o: _Data) -> _Data:
         """Join two Data objects together.
 
@@ -456,6 +382,7 @@ def join(self, __o: _Data) -> _Data:
 #                                                           Getters and setters
 # =============================================================================
 
+    # TODO: Rename this method ? It is not clear what it does
     def get_data_dict(self, index: int) -> Dict[str, Any]:
         return self.to_dataframe().loc[index].to_dict()
 
@@ -518,24 +445,6 @@ def set_columnnames(self, names: Iterable[str]) -> None:
         for old_name, new_name in zip(self.names, names):
             self.columns.rename(old_name, new_name)
 
-    def cast_types(self, domain: Domain):
-        """Cast the types of the data to the types of the domain.
-
-        Parameters
-        ----------
-        domain : Domain
-            The domain with specific parameters to cast the types to.
-
-        Raises
-        ------
-        ValueError
-            If the types of the domain and the data do not match.
-        """
-        _dtypes = {index: parameter._type
-                   for index, (_, parameter) in enumerate(
-                       domain.space.items())}
-        self.data = self.data.astype(_dtypes)
-
 
 def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
     """Converts a dictionary with scalar values to a data object.
diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index 55af5d16..b9327c39 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -186,12 +186,10 @@ def __eq__(self, __o: ExperimentData) -> bool:
                     self.domain == __o.domain])
 
     def _repr_html_(self) -> str:
-        return self._input_data.combine_data_to_multiindex(
-            self._output_data, self._jobs.to_dataframe())._repr_html_()
+        return combine_data_to_multiindex(self)._repr_html_()
 
     def __repr__(self) -> str:
-        return self._input_data.combine_data_to_multiindex(
-            self._output_data, self._jobs.to_dataframe()).__repr__()
+        return combine_data_to_multiindex(self).__repr__()
 
     def _access_file(operation: Callable) -> Callable:
         """Wrapper for accessing a single resource with a file lock
@@ -1846,3 +1844,29 @@ def x0_factory(experiment_data: ExperimentData,
 
     x0._reset_index()
     return x0
+
+
+def combine_data_to_multiindex(
+        experiment_data: ExperimentData) -> pd.DataFrame:
+    """Combine the data to a multiindex dataframe.
+
+    Parameters
+    ----------
+    experiment_data: ExperimentData
+        The ExperimentData object to combine
+
+    Returns
+    -------
+    pd.DataFrame
+        The combined dataframe.
+
+    Note
+    ----
+    This function is mainly used to show the combined ExperimentData
+    object in a Jupyter Notebook
+    """
+    return pd.concat(
+        [experiment_data._jobs.to_dataframe(),
+         experiment_data._input_data.to_dataframe(),
+         experiment_data._output_data.to_dataframe()],
+        axis=1, keys=['jobs', 'input', 'output'])
diff --git a/tests/design/test_data.py b/tests/design/test_data.py
index 0d546ccd..441750ab 100644
--- a/tests/design/test_data.py
+++ b/tests/design/test_data.py
@@ -34,27 +34,12 @@ def test_data_from_design(domain: Domain):
     assert isinstance(data.data, pd.DataFrame)
 
 
-def test_data_reset(sample_data: _Data):
-    # Assuming you have a Domain object named "domain"
-    design = Domain()
-    sample_data.reset(design)
-    assert isinstance(sample_data.data, pd.DataFrame)
-    assert len(sample_data) == 0
-
-
 def test_data_remove(sample_data: _Data):
     indices = [0, 2]
     sample_data.remove(indices)
     assert len(sample_data) == 1
 
 
-def test_data_add_numpy_arrays(sample_data: _Data):
-    input_array = np.array([[1, 4], [2, 5]])
-    df = pd.DataFrame(input_array, columns=sample_data.names)
-    sample_data.add(df)
-    assert len(sample_data) == 5
-
-
 def test_data_get_data(sample_data: _Data):
     input_data = sample_data.data
     assert isinstance(input_data, pd.DataFrame)
diff --git a/tests/experimentdata/conftest.py b/tests/experimentdata/conftest.py
index f2b70947..7c612ac0 100644
--- a/tests/experimentdata/conftest.py
+++ b/tests/experimentdata/conftest.py
@@ -65,7 +65,8 @@ def experimentdata_expected() -> ExperimentData:
     data.add(input_data=np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]),
              output_data=np.array([[0.0], [0.0]]), domain=domain_continuous)
 
-    data._input_data.round(6)
+    # data._input_data.round(6)
+    data._input_data.data = data._input_data.data.round(6)
     # data._input_data.data = [[round(num, 6) if isinstance(
     #     num, float) else num for num in sublist]
     #     for sublist in data._input_data.data]
@@ -81,7 +82,7 @@ def experimentdata_expected_no_output() -> ExperimentData:
     data.add(input_data=np.array(
         [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]), domain=domain_continuous)
 
-    data._input_data.round(6)
+    data._input_data.data = data._input_data.data.round(6)
     # data._input_data.data = [[round(num, 6) if isinstance(
     #     num, float) else num for num in sublist]
     #     for sublist in data._input_data.data]
diff --git a/tests/sampling/test_sampling.py b/tests/sampling/test_sampling.py
index b542840b..915b63b6 100644
--- a/tests/sampling/test_sampling.py
+++ b/tests/sampling/test_sampling.py
@@ -49,7 +49,7 @@ def test_correct_sampling_ran(design3: Domain):
     samples = ExperimentData(domain=design3)
     samples.sample(sampler='random', n_samples=numsamples, seed=seed)
 
-    samples._input_data.round(6)
+    samples._input_data.data = samples._input_data.data.round(6)
 
     df_input, _ = samples.to_pandas()
     df_input.columns = df_ground_truth.columns

From 82cf830934975f3020e79a0f844b3afd4cca7cd3 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Thu, 20 Jun 2024 11:39:09 +0200
Subject: [PATCH 05/17] Update numpy version <2.0.0 in requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 4be98c2e..c560f525 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-numpy
+numpy<2.0.0
 scipy
 pandas
 xarray

From 36af2ceeb95c0dcb30ab1515ab7cee6ce2567c79 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Thu, 20 Jun 2024 11:56:46 +0200
Subject: [PATCH 06/17] Refactor column renaming methods in experimentdata
 module

---
 src/f3dasm/_src/experimentdata/_columns.py       |  9 ++++++++-
 src/f3dasm/_src/experimentdata/_data.py          |  7 -------
 src/f3dasm/_src/experimentdata/experimentdata.py | 11 ++++++-----
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/f3dasm/_src/experimentdata/_columns.py b/src/f3dasm/_src/experimentdata/_columns.py
index 76a3f474..5bb09cd0 100644
--- a/src/f3dasm/_src/experimentdata/_columns.py
+++ b/src/f3dasm/_src/experimentdata/_columns.py
@@ -18,7 +18,7 @@
 from __future__ import annotations
 
 # Standard
-from typing import Dict, List, Optional
+from typing import Dict, Iterable, List, Optional
 
 #                                                          Authorship & Credits
 # =============================================================================
@@ -123,3 +123,10 @@ def rename(self, old_name: str, new_name: str):
             name of the column to replace with
         """
         self.columns[new_name] = self.columns.pop(old_name)
+
+    def set_columnnames(self, names: Iterable[str]) -> None:
+        for old_name, new_name in zip(self.names, names):
+            self.rename(old_name, new_name)
+
+    def has_columnnames(self, names: Iterable[str]) -> None:
+        return set(names).issubset(self.names)
diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py
index 42a04bc6..3817cda3 100644
--- a/src/f3dasm/_src/experimentdata/_data.py
+++ b/src/f3dasm/_src/experimentdata/_data.py
@@ -438,13 +438,6 @@ def get_index_with_nan(self) -> pd.Index:
         """
         return self.indices[self.data.isna().any(axis=1)]
 
-    def has_columnnames(self, names: Iterable[str]) -> bool:
-        return set(names).issubset(self.names)
-
-    def set_columnnames(self, names: Iterable[str]) -> None:
-        for old_name, new_name in zip(self.names, names):
-            self.columns.rename(old_name, new_name)
-
 
 def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
     """Converts a dictionary with scalar values to a data object.
diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index b9327c39..f053fdc5 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -133,16 +133,17 @@ def __init__(self,
             jobs, self._input_data, self._output_data, job_value)
 
         # Check if the columns of input_data are in the domain
-        if not self._input_data.has_columnnames(self.domain.names):
-            self._input_data.set_columnnames(self.domain.names)
+        if not self._input_data.columns.has_columnnames(self.domain.names):
+            self._input_data.columns.set_columnnames(self.domain.names)
 
-        if not self._output_data.has_columnnames(self.domain.output_names):
-            self._output_data.set_columnnames(self.domain.output_names)
+        if not self._output_data.columns.has_columnnames(
+                self.domain.output_names):
+            self._output_data.columns.set_columnnames(self.domain.output_names)
 
         # For backwards compatibility; if the output_data has
         #  only one column, rename it to 'y'
         if self._output_data.names == [0]:
-            self._output_data.set_columnnames(['y'])
+            self._output_data.columns.set_columnnames(['y'])
 
     def __len__(self):
         """The len() method returns the number of datapoints"""

From 25070b49bf92205787647a50e64801379c723cb4 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Fri, 21 Jun 2024 17:04:24 +0200
Subject: [PATCH 07/17] added _experimental newdata object

---
 .coveragerc                                   |    1 +
 .../_experimental/_jobqueue2.py               |  305 +++
 .../experimentdata/_experimental/_newdata2.py |  221 ++
 .../_experimental/_newexperimentdata2.py      | 1881 +++++++++++++++++
 src/f3dasm/_src/experimentdata/_jobqueue.py   |    4 +-
 5 files changed, 2411 insertions(+), 1 deletion(-)
 create mode 100644 src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
 create mode 100644 src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
 create mode 100644 src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py

diff --git a/.coveragerc b/.coveragerc
index 4cb78b98..54014b4b 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -5,6 +5,7 @@ source = src
 omit =
     tests/*
     **/__init__.py
+    src/f3dasm/_src/experimentdata/_experimental/*
 
 [report]
 # Regexes for lines to exclude from consideration
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py b/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
new file mode 100644
index 00000000..3c88308b
--- /dev/null
+++ b/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
@@ -0,0 +1,305 @@
+#                                                                       Modules
+# =============================================================================
+
+from __future__ import annotations
+
+# Standard
+from copy import deepcopy
+from enum import Enum
+from pathlib import Path
+from typing import Iterable, List, Type
+
+# Third-party
+import pandas as pd
+
+# Local
+from ._newdata2 import _Data
+
+#                                                          Authorship & Credits
+# =============================================================================
+__author__ = 'Martin van der Schelling (M.P.vanderSchelling@tudelft.nl)'
+__credits__ = ['Martin van der Schelling']
+__status__ = 'Stable'
+# =============================================================================
+#
+# =============================================================================
+
+
+class Status(str, Enum):
+    """Enum class for the status of a job."""
+    OPEN = 'open'
+    IN_PROGRESS = 'in progress'
+    FINISHED = 'finished'
+    ERROR = 'error'
+
+    def __str__(self) -> str:
+        return self.value
+
+
+class NoOpenJobsError(Exception):
+    """
+    Exception raised when there are no open jobs.
+
+    Attributes:
+        message (str): The error message.
+    """
+
+    def __init__(self, message):
+        super().__init__(message)
+
+# =============================================================================
+
+
+class Index:
+    def __init__(self, jobs: pd.Series | None | str = None):
+        if isinstance(jobs, str):
+            self.jobs = pd.Series(jobs, index=[0], dtype='string')
+
+        elif jobs is None:
+            self.jobs = pd.Series(dtype='string')
+
+        else:
+            self.jobs = jobs
+
+    def __len__(self) -> int:
+        return len(self.jobs)
+
+    def __add__(self, __o: Index | str) -> Index:
+        if isinstance(__o, str):
+            __o = Index(__o)
+
+        if self.jobs.empty:
+            return __o
+
+        # Make a copy of other.jobs and modify its index
+        other_jobs_copy = deepcopy(__o)
+        other_jobs_copy.jobs.index = range(
+            len(other_jobs_copy)) + self.jobs.index[-1] + 1
+
+        return Index(pd.concat([self.jobs, other_jobs_copy.jobs]))
+
+    def __getitem__(self, indices: int | slice | Iterable[int]) -> Index:
+        if isinstance(indices, int):
+            indices = [indices]
+        return Index(self.jobs[indices].copy())
+
+    def __eq__(self, __o: Index) -> bool:
+        return self.jobs.equals(__o.jobs)
+
+    def _repr_html_(self) -> str:
+        return self.jobs.__repr__()
+
+    @property
+    def indices(self) -> pd.Index:
+        """The indices of the jobs."""
+        return self.jobs.index
+
+    def iloc(self, indices: Iterable[int]) -> Iterable[int]:
+        return self.indices.get_indexer(indices)
+
+    #                                                  Alternative Constructors
+    # =========================================================================
+
+    @classmethod
+    def from_data(cls: Type[Index], data: _Data,
+                  value: str = Status.OPEN) -> Index:
+        """Create a JobQueue object from a Data object.
+
+        Parameters
+        ----------
+        data : Data
+            Data object containing the data.
+        value : str
+            The value to assign to the jobs. Can be 'open',
+            'in progress', 'finished', or 'error'.
+
+        Returns
+        -------
+        JobQueue
+            JobQueue object containing the loaded data.
+        """
+        return cls(pd.Series([value] * len(data), dtype='string'))
+
+    @classmethod
+    def from_file(cls: Type[Index], filename: Path | str) -> Index:
+        """Create a JobQueue object from a pickle file.
+
+        Parameters
+        ----------
+        filename : Path | str
+            Name of the file.
+
+        Returns
+        -------
+        JobQueue
+            JobQueue object containing the loaded data.
+        """
+        # Convert filename to Path
+        if Path(filename).with_suffix('.csv').exists():
+            return cls(
+                pd.read_csv(Path(filename).with_suffix('.csv'),
+                            index_col=0)['0'])
+
+        elif Path(filename).with_suffix('.pkl').exists():
+            return cls(
+                pd.read_pickle(Path(filename).with_suffix('.pkl')))
+
+        else:
+            raise FileNotFoundError(f"Jobfile {filename} does not exist.")
+
+    #                                                                    Select
+    # =========================================================================
+
+    def select_all(self, status: str) -> Index:
+        """Selects all jobs with a certain status.
+
+        Parameters
+        ----------
+        status : str
+            Status of the jobs to select
+
+        Returns
+        -------
+        JobQueue
+            JobQueue object containing the selected jobs.
+        """
+        return Index(self.jobs[self.jobs == status])
+
+    #                                                                    Export
+    # =========================================================================
+
+    def store(self, filename: Path) -> None:
+        """Stores the jobs in a pickle file.
+
+        Parameters
+        ----------
+        filename : Path
+            Path of the file.
+        """
+        self.jobs.to_csv(filename.with_suffix('.csv'))
+
+    def to_dataframe(self, name: str = "") -> pd.DataFrame:
+        """Converts the job queue to a DataFrame.
+
+        Parameters
+        ----------
+        name : str, optional
+            Name of the column, by default "".
+
+        Note
+        ----
+        If the name is not specified, the column name will be an empty string
+
+        Returns
+        -------
+        DataFrame
+            DataFrame containing the jobs.
+        """
+        return self.jobs.to_frame("")
+
+    #                                                    Append and remove jobs
+    # =========================================================================
+
+    def remove(self, indices: List[int]):
+        """Removes a subset of the jobs.
+
+        Parameters
+        ----------
+        indices : List[int]
+            List of indices to remove.
+        """
+        self.jobs = self.jobs.drop(indices)
+
+    def overwrite(
+            self, indices: Iterable[int],
+            other: Index | str) -> None:
+
+        if isinstance(other, str):
+            other = Index(
+                pd.Series([other], index=[0], dtype='string'))
+
+        self.jobs.update(other.jobs.set_axis(indices))
+
+    #                                                                      Mark
+    # =========================================================================
+
+    def mark(self, index: int | slice | Iterable[int], status: Status) -> None:
+        """Marks a job with a certain status.
+
+        Parameters
+        ----------
+        index : int
+            Index of the job to mark.
+        status : str
+            Status to mark the job with.
+        """
+        self.jobs.loc[index] = status
+
+    def mark_all_in_progress_open(self) -> None:
+        """Marks all jobs as 'open'."""
+        self.jobs = self.jobs.replace(Status.IN_PROGRESS, Status.OPEN)
+
+    def mark_all_error_open(self) -> None:
+        """Marks all jobs as 'open'."""
+        self.jobs = self.jobs.replace(Status.ERROR, Status.OPEN)
+    #                                                              Miscellanous
+    # =========================================================================
+
+    def is_all_finished(self) -> bool:
+        """Checks if all jobs are finished.
+
+        Returns
+        -------
+        bool
+            True if all jobs are finished, False otherwise.
+        """
+        return all(self.jobs.isin([Status.FINISHED, Status.ERROR]))
+
+    def get_open_job(self) -> int:
+        """Returns the index of an open job.
+
+        Returns
+        -------
+        int
+            Index of an open job.
+        """
+        try:  # try to find an open job
+            return int(self.jobs[self.jobs == Status.OPEN].index[0])
+        except IndexError:
+            raise NoOpenJobsError("No open jobs found.")
+
+    def reset_index(self) -> None:
+        """Resets the index of the jobs."""
+        self.jobs.reset_index(drop=True, inplace=True)
+
+
+def _jobs_factory(jobs: Path | str | Index | None, input_data: _Data,
+                  output_data: _Data, job_value: Status) -> Index:
+    """Creates a Index object from particular inpute
+
+    Parameters
+    ----------
+    jobs : Path | str | None
+        input data for the jobs
+    input_data : _Data
+        _Data object of input data to extract indices from, if necessary
+    output_data : _Data
+        _Data object of output data to extract indices from, if necessary
+    job_value : Status
+        initial value of all the jobs
+
+    Returns
+    -------
+    Index
+        JobQueue object
+    """
+    if isinstance(jobs, Index):
+        return jobs
+
+    if isinstance(jobs, (Path, str)):
+        return Index.from_file(Path(jobs))
+
+    if input_data.is_empty():
+        return Index.from_data(output_data, value=job_value)
+
+    return Index.from_data(input_data, value=job_value)
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
new file mode 100644
index 00000000..759473e6
--- /dev/null
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
@@ -0,0 +1,221 @@
+#                                                                       Modules
+# =============================================================================
+
+from __future__ import annotations
+
+# Standard
+from copy import deepcopy
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Type, Union
+
+# Third-party
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+#                                                          Authorship & Credits
+# =============================================================================
+__author__ = 'Martin van der Schelling (M.P.vanderSchelling@tudelft.nl)'
+__credits__ = ['Martin van der Schelling']
+__status__ = 'Stable'
+# =============================================================================
+#
+# =============================================================================
+
+MISSING_VALUE = np.nan
+
+
+class _Data:
+    def __init__(self, data: Dict[int, Dict[str, Any]] = None):
+        self.data = data if data is not None else {}
+
+    def __len__(self) -> int:
+        return len(self.data)
+
+    def __iter__(self):
+        return iter(self.data.values())
+
+    def __getitem__(self, rows: int | slice | Iterable[int]) -> _Data:
+
+        if isinstance(rows, int):
+            rows = [rows]
+
+        return _Data({row: self.data.get(row, {}) for row in rows})
+
+    def __add__(self, __o: _Data) -> _Data:
+        if self.is_empty():
+            return __o
+
+        _data_copy = deepcopy(self)
+        other_data_copy = deepcopy(__o)
+
+        new_indices = (np.array(range(len(__o))) + max(self.data) + 1).tolist()
+
+        _data_copy.data.update({row: values for row, values in zip(
+            new_indices, other_data_copy.data.values())})
+        return _data_copy
+
+    def __eq__(self, __o: _Data) -> bool:
+        return self.data == __o.data
+
+    def _repr_html_(self) -> str:
+        return self.to_dataframe()._repr_html_()
+
+    def __repr__(self) -> str:
+        return self.to_dataframe().__repr__()
+
+    @property
+    def indices(self) -> List[int]:
+        return list(self.data.keys())
+
+    @property
+    def names(self) -> List[str]:
+        return self.to_dataframe().columns.tolist()
+
+    @classmethod
+    def from_indices(cls, rows: Iterable[int]):
+        return cls({row: {} for row in rows})
+
+    # @classmethod
+    # def from_domain(cls, space: Iterable[str]):
+    #     return cls(None)
+
+    @classmethod
+    def from_file(cls, filename: Path) -> _Data:
+        ...
+
+    @classmethod
+    def from_numpy(cls: Type[_Data], array: np.ndarray,
+                   keys: Optional[Iterable[str]] = None) -> _Data:
+        if keys is not None:
+            return _Data(
+                {index: {key: col for key, col in zip(keys, row)
+                         } for index, row in enumerate(array)})
+        else:
+            # Look out! i is now an integer key!
+            return _Data(
+                {index: {i: col for i, col in enumerate(row)
+                         } for index, row in enumerate(array)})
+
+    @classmethod
+    def from_dataframe(cls, df: pd.DataFrame) -> _Data:
+        return _Data(
+            {index: row.to_dict() for index, (_, row) in
+             enumerate(df.iterrows())})
+
+    def to_numpy(self) -> np.ndarray:
+        return self.to_dataframe().to_numpy()
+
+    def to_xarray(self, label: str):
+        df = self.to_dataframe()
+        # Can create the xarray with the information from the domain!
+        return xr.DataArray(
+            self.to_dataframe(), dims=['iterations', label], coords={
+                'iterations': df.index, label: df.columns})
+
+    def to_dataframe(self) -> pd.DataFrame:
+        # Can create the dataframe from the numpy array + column names!!
+        return pd.DataFrame(self.data).T
+
+    def store(self, filename: Path):
+        ...
+
+    def n_best_samples(self, nosamples: int, key: str) -> _Data:
+        df = self.to_dataframe()
+        return df.nsmallest(
+            n=nosamples, columns=key)
+
+    def select_columns(self, keys: Iterable[str] | str) -> _Data:
+        # This only works for single ints or slices!!
+
+        if isinstance(keys, str):
+            keys = [keys]
+
+        return _Data(
+            {index: {key: row.get(key, MISSING_VALUE) for key in keys}
+             for index, row in self.data.items()})
+
+    def drop(self, keys: Iterable[str] | str) -> _Data:
+        # Might be depreciated?
+
+        if isinstance(keys, str):
+            keys = [keys]
+
+        for row in self.data:
+            for key in keys:
+                if key in row:
+                    del self.data[row][key]
+
+    def add_column(self, key: str):
+        for row in self.data:
+            self.data[row][key] = MISSING_VALUE
+
+    def remove(self, rows: Iterable[int]):
+        for row in rows:
+            del self.data[row]  # = deleting the row
+
+    def overwrite(self, rows: Iterable[int], __o: _Data):
+        for index, other_row in zip(rows, __o):
+            self.data[index] = other_row
+
+    def join(self, __o: _Data) -> _Data:
+        _data = deepcopy(self)
+        for row, other_row in zip(_data, __o):
+            row.update(other_row)
+
+        return _Data(_data)
+
+    def get_data_dict(self, row: int) -> Dict[str, Any]:
+        return self.data[row]
+
+    def set_data(self, row: int, value: Any, key: str):
+        self.data[row][key] = value
+
+    def reset_index(self, rows: Iterable[int] = None):
+        self.data = {index: values for index, values in enumerate(self.data)
+                     }
+
+    def is_empty(self) -> bool:
+        return not bool(self.data)
+
+
+def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
+    """Converts a dictionary with scalar values to a data object.
+
+    Parameters
+    ----------
+    dict : Dict[str, Any]
+        The dictionary to convert. Note that the dictionary
+         should only have scalar values!
+
+    Returns
+    -------
+    _Data
+        The data object.
+    """
+    return _Data({0: {dictionary}})
+
+
+def _data_factory(data: DataTypes) -> _Data:
+    if data is None:
+        return _Data()
+
+    elif isinstance(data, _Data):
+        return data
+
+    elif isinstance(data, pd.DataFrame):
+        return _Data.from_dataframe(data)
+
+    elif isinstance(data, (Path, str)):
+        return _Data.from_file(Path(data))
+
+    elif isinstance(data, np.ndarray):
+        return _Data.from_numpy(data)
+
+    else:
+        raise TypeError(
+            f"Data must be of type _Data, pd.DataFrame, np.ndarray, "
+            f"Path or str, not {type(data)}")
+
+
+DataTypes = Union[pd.DataFrame, np.ndarray, Path, str, _Data]
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
new file mode 100644
index 00000000..cb776297
--- /dev/null
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
@@ -0,0 +1,1881 @@
+"""
+The ExperimentData object is the main object used to store implementations
+ of a design-of-experiments, keep track of results, perform optimization and
+ extract data for machine learning purposes.
+"""
+
+#                                                                       Modules
+# =============================================================================
+
+from __future__ import annotations
+
+# Standard
+import inspect
+import traceback
+from copy import copy
+from functools import wraps
+from pathlib import Path
+from time import sleep
+from typing import (Any, Callable, Dict, Iterable, Iterator, List, Literal,
+                    Optional, Tuple, Type)
+
+# Third-party
+import numpy as np
+import pandas as pd
+import xarray as xr
+from filelock import FileLock
+from hydra.utils import get_original_cwd
+from omegaconf import DictConfig
+from pathos.helpers import mp
+
+# Local
+from ...datageneration.datagenerator import DataGenerator, convert_function
+from ...datageneration.functions.function_factory import _datagenerator_factory
+from ...design.domain import Domain, _domain_factory
+from ...logger import logger
+from ...optimization import Optimizer
+from ...optimization.optimizer_factory import _optimizer_factory
+from .._io import (DOMAIN_FILENAME, EXPERIMENTDATA_SUBFOLDER,
+                   INPUT_DATA_FILENAME, JOBS_FILENAME, LOCK_FILENAME,
+                   MAX_TRIES, OUTPUT_DATA_FILENAME, _project_dir_factory)
+from ..experimentsample import ExperimentSample
+from ..samplers import Sampler, SamplerNames, _sampler_factory
+from ..utils import number_of_overiterations, number_of_updates
+from ._jobqueue2 import NoOpenJobsError, Status, _jobs_factory
+from ._newdata2 import DataTypes, _Data, _data_factory
+
+#                                                          Authorship & Credits
+# =============================================================================
+__author__ = 'Martin van der Schelling (M.P.vanderSchelling@tudelft.nl)'
+__credits__ = ['Martin van der Schelling']
+__status__ = 'Stable'
+# =============================================================================
+#
+# =============================================================================
+
+
+class ExperimentData:
+    """
+    A class that contains data for experiments.
+    """
+
+    def __init__(self,
+                 domain: Optional[Domain] = None,
+                 input_data: Optional[DataTypes] = None,
+                 output_data: Optional[DataTypes] = None,
+                 jobs: Optional[Path | str] = None,
+                 project_dir: Optional[Path] = None):
+        """
+        Initializes an instance of ExperimentData.
+
+        Parameters
+        ----------
+        domain : Domain, optional
+            The domain of the experiment, by default None
+        input_data : DataTypes, optional
+            The input data of the experiment, by default None
+        output_data : DataTypes, optional
+            The output data of the experiment, by default None
+        jobs : Path | str, optional
+            The path to the jobs file, by default None
+        project_dir : Path | str, optional
+            A user-defined directory where the f3dasm project folder will be \
+            created, by default the current working directory.
+
+        Note
+        ----
+
+        The following data formats are supported for input and output data:
+
+        * numpy array
+        * pandas Dataframe
+        * path to a csv file
+
+        If no domain object is provided, the domain is inferred from the \
+        input_data.
+
+        If the provided project_dir does not exist, it will be created.
+
+        Raises
+        ------
+
+        ValueError
+            If the input_data is a numpy array, the domain has to be provided.
+        """
+
+        if isinstance(input_data, np.ndarray) and domain is None:
+            raise ValueError(
+                'If you provide a numpy array as input_data, \
+                you have to provide the domain!')
+
+        self.project_dir = _project_dir_factory(project_dir)
+
+        self._input_data = _data_factory(input_data)
+        self._output_data = _data_factory(output_data)
+
+        # Create empty output_data from indices if output_data is empty
+        if self._output_data.is_empty():
+            self._output_data = _Data.from_indices(self._input_data.indices)
+            job_value = Status.OPEN
+
+        else:
+            job_value = Status.FINISHED
+
+        self.domain = _domain_factory(
+            domain=domain, input_data=self._input_data.to_dataframe(),
+            output_data=self._output_data.to_dataframe())
+
+        # Create empty input_data from domain if input_data is empty
+        if self._input_data.is_empty():
+            self._input_data = _Data()
+
+        self._jobs = _jobs_factory(
+            jobs, self._input_data, self._output_data, job_value)
+
+        # # Check if the columns of input_data are in the domain
+        # if not self._input_data.columns.has_columnnames(self.domain.names):
+        #     self._input_data.columns.set_columnnames(self.domain.names)
+
+        # if not self._output_data.columns.has_columnnames(
+        #         self.domain.output_names):
+        #     self._output_data.columns.set_columnnames(self.domain.output_names)
+
+        # For backwards compatibility; if the output_data has
+        #  only one column, rename it to 'y'
+        # TODO: Fix this for newdata2
+        if self._output_data.names == [0]:
+            self._output_data.columns.set_columnnames(['y'])
+
+    def __len__(self):
+        """The len() method returns the number of datapoints"""
+        return len(self._jobs)
+
+        # if self._input_data.is_empty():
+        #     return len(self._output_data)
+
+        # return len(self._input_data)
+
+    def __iter__(self) -> Iterator[Tuple[Dict[str, Any]]]:
+        self.current_index = 0
+        return self
+
+    def __next__(self) -> ExperimentSample:
+        if self.current_index >= len(self):
+            raise StopIteration
+        else:
+            index = self.index[self.current_index]
+            self.current_index += 1
+            return self.get_experiment_sample(index)
+
+    def __add__(self,
+                __o: ExperimentData | ExperimentSample) -> ExperimentData:
+        """The + operator combines two ExperimentData objects"""
+        # Check if the domains are the same
+
+        if not isinstance(__o, (ExperimentData, ExperimentSample)):
+            raise TypeError(
+                f"Can only add ExperimentData or "
+                f"ExperimentSample objects, not {type(__o)}")
+
+        return ExperimentData(
+            input_data=self._input_data + __o._input_data,
+            output_data=self._output_data + __o._output_data,
+            jobs=self._jobs + __o._jobs, domain=self.domain + __o.domain,
+            project_dir=self.project_dir)
+
+    def __eq__(self, __o: ExperimentData) -> bool:
+        return all([self._input_data == __o._input_data,
+                    self._output_data == __o._output_data,
+                    self._jobs == __o._jobs,
+                    self.domain == __o.domain])
+
+    def _repr_html_(self) -> str:
+        return combine_data_to_multiindex(self)._repr_html_()
+
+    def __repr__(self) -> str:
+        return combine_data_to_multiindex(self).__repr__()
+
+    def _access_file(operation: Callable) -> Callable:
+        """Wrapper for accessing a single resource with a file lock
+
+        Parameters
+        ----------
+        operation : Callable
+            The operation to be performed on the resource
+
+        Returns
+        -------
+        Callable
+            The wrapped operation
+        """
+        @wraps(operation)
+        def wrapper_func(self: ExperimentData, *args, **kwargs) -> None:
+            lock = FileLock(
+                (self.
+                 project_dir / EXPERIMENTDATA_SUBFOLDER / LOCK_FILENAME)
+                .with_suffix('.lock'))
+
+            # If the lock has been acquired:
+            with lock:
+                tries = 0
+                while tries < MAX_TRIES:
+                    try:
+                        self = ExperimentData.from_file(self.project_dir)
+                        value = operation(self, *args, **kwargs)
+                        self.store()
+                        break
+
+                    # Racing conditions can occur when the file is empty
+                    # and the file is being read at the same time
+                    except pd.errors.EmptyDataError:
+                        tries += 1
+                        logger.debug((
+                            f"EmptyDataError occurred, retrying"
+                            f" {tries+1}/{MAX_TRIES}"))
+                        sleep(1)
+
+                    raise pd.errors.EmptyDataError()
+
+            return value
+
+        return wrapper_func
+    #                                                                Properties
+    # =========================================================================
+
+    @property
+    def index(self) -> pd.Index:
+        """Returns an iterable of the job number of the experiments
+
+        Returns
+        -------
+        pd.Index
+            The job number of all the experiments in pandas Index format
+        """
+        return self._jobs.indices
+
+        # if self._input_data.is_empty():
+        #     return self._output_data.indices
+
+        # return self._input_data.indices
+
+    #                                                  Alternative Constructors
+    # =========================================================================
+
+    @classmethod
+    def from_file(cls: Type[ExperimentData],
+                  project_dir: Path | str) -> ExperimentData:
+        """Create an ExperimentData object from .csv and .json files.
+
+        Parameters
+        ----------
+        project_dir : Path | str
+            User defined path of the experimentdata directory.
+
+        Returns
+        -------
+        ExperimentData
+            ExperimentData object containing the loaded data.
+        """
+        if isinstance(project_dir, str):
+            project_dir = Path(project_dir)
+
+        try:
+            return cls._from_file_attempt(project_dir)
+        except FileNotFoundError:
+            try:
+                filename_with_path = Path(get_original_cwd()) / project_dir
+            except ValueError:  # get_original_cwd() hydra initialization error
+                raise FileNotFoundError(
+                    f"Cannot find the folder {project_dir} !")
+
+            return cls._from_file_attempt(filename_with_path)
+
+    @classmethod
+    def from_sampling(cls, sampler: Sampler | str, domain: Domain | DictConfig,
+                      n_samples: int = 1,
+                      seed: Optional[int] = None,
+                      **kwargs) -> ExperimentData:
+        """Create an ExperimentData object from a sampler.
+
+        Parameters
+        ----------
+        sampler : Sampler | str
+            Sampler object containing the sampling strategy or one of the
+            built-in sampler names.
+        domain : Domain | DictConfig
+            Domain object containing the domain of the experiment or hydra
+            DictConfig object containing the configuration.
+        n_samples : int, optional
+            Number of samples, by default 1.
+        seed : int, optional
+            Seed for the random number generator, by default None.
+
+        Returns
+        -------
+        ExperimentData
+            ExperimentData object containing the sampled data.
+
+        Note
+        ----
+
+        If a string is passed for the sampler argument, it should be one
+        of the built-in samplers:
+
+        * 'random' : Random sampling
+        * 'latin' : Latin Hypercube Sampling
+        * 'sobol' : Sobol Sequence Sampling
+        * 'grid' : Grid Search Sampling
+
+        Any additional keyword arguments are passed to the sampler.
+        """
+        experimentdata = cls(domain=domain)
+        experimentdata.sample(
+            sampler=sampler, n_samples=n_samples, seed=seed, **kwargs)
+        return experimentdata
+
+    @classmethod
+    def from_yaml(cls, config: DictConfig) -> ExperimentData:
+        """Create an ExperimentData object from a hydra yaml configuration.
+
+        Parameters
+        ----------
+        config : DictConfig
+            A DictConfig object containing the configuration of the \
+            experiment data.
+
+        Returns
+        -------
+        ExperimentData
+            ExperimentData object containing the loaded data.
+        """
+        # Option 0: Both existing and sampling
+        if 'from_file' in config and 'from_sampling' in config:
+            return cls.from_file(config.from_file) + cls.from_sampling(
+                **config.from_sampling)
+
+        # Option 1: From exisiting ExperimentData files
+        if 'from_file' in config:
+            return cls.from_file(config.from_file)
+
+        # Option 2: Sample from the domain
+        if 'from_sampling' in config:
+            return cls.from_sampling(**config.from_sampling)
+
+        else:
+            return cls(**config)
+
+    @classmethod
+    def _from_file_attempt(cls: Type[ExperimentData],
+                           project_dir: Path) -> ExperimentData:
+        """Attempt to create an ExperimentData object
+        from .csv and .pkl files.
+
+        Parameters
+        ----------
+        path : Path
+            Name of the user-defined directory where the files are stored.
+
+        Returns
+        -------
+        ExperimentData
+            ExperimentData object containing the loaded data.
+
+        Raises
+        ------
+        FileNotFoundError
+            If the files cannot be found.
+        """
+        subdirectory = project_dir / EXPERIMENTDATA_SUBFOLDER
+
+        try:
+            return cls(domain=subdirectory / DOMAIN_FILENAME,
+                       input_data=subdirectory / INPUT_DATA_FILENAME,
+                       output_data=subdirectory / OUTPUT_DATA_FILENAME,
+                       jobs=subdirectory / JOBS_FILENAME,
+                       project_dir=project_dir)
+        except FileNotFoundError:
+            raise FileNotFoundError(
+                f"Cannot find the files from {subdirectory}.")
+
+    #                                                         Selecting subsets
+    # =========================================================================
+
+    def select(self, job_ids: int | Iterable[int]) -> ExperimentData:
+        """Select a subset of the ExperimentData object
+
+        Parameters
+        ----------
+        indices : int | Iterable[int]
+            The indices to select.
+
+        Returns
+        -------
+        ExperimentData
+            The selected ExperimentData object with only the selected indices.
+        """
+        indices = self._jobs.iloc(job_ids)
+        # TODO: It could be that the indices are not in the input_data
+        # and output_data, because they are not defined
+
+        return ExperimentData(input_data=self._input_data[indices],
+                              output_data=self._output_data[indices],
+                              jobs=self._jobs[job_ids],
+                              domain=self.domain, project_dir=self.project_dir)
+
+    def drop_output(self, names: Iterable[str] | str) -> ExperimentData:
+        """Drop a column from the output data
+
+        Parameters
+        ----------
+        names : Iteraeble | str
+            The names of the columns to drop.
+
+        Returns
+        -------
+        ExperimentData
+            The ExperimentData object with the column dropped.
+        """
+        return ExperimentData(input_data=self._input_data,
+                              output_data=self._output_data.drop(names),
+                              jobs=self._jobs, domain=self.domain.drop_output(
+                                  names),
+                              project_dir=self.project_dir)
+
+    def select_with_status(self, status: Literal['open', 'in progress',
+                                                 'finished', 'error']
+                           ) -> ExperimentData:
+        """Select a subset of the ExperimentData object with a given status
+
+        Parameters
+        ----------
+        status : Literal['open', 'in progress', 'finished', 'error']
+            The status to select.
+
+        Returns
+        -------
+        ExperimentData
+            The selected ExperimentData object with only the selected status.
+
+        Raises
+        ------
+        ValueError
+            Raised when invalid status is specified
+        """
+        if status not in [s.value for s in Status]:
+            raise ValueError(f"Invalid status {status} given. "
+                             f"\nChoose from values: "
+                             f"{', '.join([s.value for s in Status])}")
+
+        _indices = self._jobs.select_all(status).indices
+        return self.select(_indices)
+
+    def get_input_data(self,
+                       parameter_names: Optional[str | Iterable[str]] = None
+                       ) -> ExperimentData:
+        """Retrieve a subset of the input data from the ExperimentData object
+
+        Parameters
+        ----------
+        parameter_names : str | Iterable[str], optional
+            The name(s) of the input parameters that you want to retrieve, \
+            if None all input parameters are retrieved, by default None
+
+        Returns
+        -------
+        ExperimentData
+            The selected ExperimentData object with only the\
+             selected input data.
+
+        Note
+        ----
+        If parameter_names is None, all input data is retrieved. \
+        The returned ExperimentData object has the domain of \
+        the original ExperimentData object, \
+        but only with the selected input parameters.\
+        """
+        if parameter_names is None:
+            return ExperimentData(input_data=self._input_data,
+                                  jobs=self._jobs,
+                                  domain=self.domain,
+                                  project_dir=self.project_dir)
+        else:
+            return ExperimentData(input_data=self._input_data.select_columns(
+                parameter_names),
+                jobs=self._jobs,
+                domain=self.domain.select(parameter_names),
+                project_dir=self.project_dir)
+
+    def get_output_data(self,
+                        parameter_names: Optional[str | Iterable[str]] = None
+                        ) -> ExperimentData:
+        """Retrieve a subset of the output data from the ExperimentData object
+
+        Parameters
+        ----------
+        parameter_names : str | Iterable[str], optional
+            The name(s) of the output parameters that you want to retrieve, \
+            if None all output parameters are retrieved, by default None
+
+        Returns
+        -------
+        ExperimentData
+            The selected ExperimentData object with only \
+            the selected output data.
+
+        Note
+        ----
+        If parameter_names is None, all output data is retrieved. \
+        The returned ExperimentData object has no domain object and \
+        no input data!
+        """
+        if parameter_names is None:
+            # TODO: Make a domain where space is empty
+            # but it tracks output_space!
+            return ExperimentData(
+                output_data=self._output_data, jobs=self._jobs,
+                project_dir=self.project_dir)
+        else:
+            return ExperimentData(
+                output_data=self._output_data.select_columns(parameter_names),
+                jobs=self._jobs,
+                project_dir=self.project_dir)
+
+    #                                                                    Export
+    # =========================================================================
+
+    def store(self, project_dir: Optional[Path | str] = None):
+        """Write the ExperimentData to disk in the project directory.
+
+        Parameters
+        ----------
+        project_dir : Optional[Path | str], optional
+            The f3dasm project directory to store the \
+            ExperimentData object to, by default None.
+
+        Note
+        ----
+        If no project directory is provided, the ExperimentData object is \
+        stored in the directory provided by the `.project_dir` attribute that \
+        is set upon creation of the object.
+
+        The ExperimentData object is stored in a subfolder 'experiment_data'.
+
+        The ExperimentData object is stored in four files:
+
+        * the input data (`input.csv`)
+        * the output data (`output.csv`)
+        * the jobs (`jobs.pkl`)
+        * the domain (`domain.pkl`)
+
+        To avoid the ExperimentData to be written simultaneously by multiple \
+        processes, a '.lock' file is automatically created \
+        in the project directory. Concurrent process can only sequentially \
+        access the lock file. This lock file is removed after the \
+        ExperimentData object is written to disk.
+        """
+        if project_dir is not None:
+            self.set_project_dir(project_dir)
+
+        subdirectory = self.project_dir / EXPERIMENTDATA_SUBFOLDER
+
+        # Create the subdirectory if it does not exist
+        subdirectory.mkdir(parents=True, exist_ok=True)
+
+        self._input_data.store(subdirectory / Path(INPUT_DATA_FILENAME))
+        self._output_data.store(subdirectory / Path(OUTPUT_DATA_FILENAME))
+        self._jobs.store(subdirectory / Path(JOBS_FILENAME))
+        self.domain.store(subdirectory / Path(DOMAIN_FILENAME))
+
+    def to_numpy(self) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Convert the ExperimentData object to a tuple of numpy arrays.
+
+        Returns
+        -------
+        tuple
+            A tuple containing two numpy arrays, \
+            the first one for input columns, \
+            and the second for output columns.
+        """
+        return self._input_data.to_numpy(), self._output_data.to_numpy()
+
+    def to_pandas(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        """
+        Convert the ExperimentData object to a pandas DataFrame.
+
+        Returns
+        -------
+        tuple
+            A tuple containing two pandas DataFrames, \
+            the first one for input columns, and the second for output
+        """
+        return (self._input_data.to_dataframe(),
+                self._output_data.to_dataframe())
+
+    def to_xarray(self) -> xr.Dataset:
+        """
+        Convert the ExperimentData object to an xarray Dataset.
+
+        Returns
+        -------
+        xarray.Dataset
+            An xarray Dataset containing the data.
+        """
+        return xr.Dataset(
+            {'input': self._input_data.to_xarray('input_dim'),
+             'output': self._output_data.to_xarray('output_dim')})
+
+    def get_n_best_output(self, n_samples: int) -> ExperimentData:
+        """Get the n best samples from the output data. \
+        We consider lower values to be better.
+
+        Parameters
+        ----------
+        n_samples : int
+            Number of samples to select.
+
+        Returns
+        -------
+        ExperimentData
+            New experimentData object with a selection of the n best samples.
+
+        Note
+        ----
+
+        The n best samples are selected based on the output data. \
+        The output data is sorted based on the first output parameter. \
+        The n best samples are selected based on this sorting. \
+        """
+        df = self._output_data.n_best_samples(
+            n_samples, self._output_data.names)
+        return self.select(df.index)
+
+    #                                                     Append or remove data
+    # =========================================================================
+
+    def add(self, domain: Optional[Domain] = None,
+            input_data: Optional[DataTypes] = None,
+            output_data: Optional[DataTypes] = None,
+            jobs: Optional[Path | str] = None) -> None:
+        """Add data to the ExperimentData object.
+
+        Parameters
+        ----------
+        domain : Optional[Domain], optional
+            Domain of the added object, by default None
+        input_data : Optional[DataTypes], optional
+            input parameters of the added object, by default None
+        output_data : Optional[DataTypes], optional
+            output parameters of the added object, by default None
+        jobs : Optional[Path  |  str], optional
+            jobs off the added object, by default None
+        """
+        self.add_experiments(ExperimentData(
+            domain=domain, input_data=input_data,
+            output_data=output_data,
+            jobs=jobs))
+
+    def add_experiments(self,
+                        experiment_sample: ExperimentSample | ExperimentData
+                        ) -> None:
+        """
+        Add an ExperimentSample or ExperimentData to the ExperimentData
+        attribute.
+
+        Parameters
+        ----------
+        experiment_sample : ExperimentSample or ExperimentData
+            Experiment(s) to add.
+
+        Raises
+        ------
+        ValueError
+            If -after checked- the indices of the input and output data
+            objects are not equal.
+        """
+
+        if isinstance(experiment_sample, ExperimentData):
+            experiment_sample._reset_index()
+            self.domain += experiment_sample.domain
+
+        self._input_data += experiment_sample._input_data
+        self._output_data += experiment_sample._output_data
+        self._jobs += experiment_sample._jobs
+
+        # Check if indices of the internal objects are equal
+        if not (self._input_data.indices.equals(self._output_data.indices)
+                and self._input_data.indices.equals(self._jobs.indices)):
+            raise ValueError(f"Indices of the internal objects are not equal."
+                             f"input_data {self._input_data.indices}, "
+                             f"output_data {self._output_data.indices},"
+                             f"jobs: {self._jobs.indices}")
+
+        # Apparently you need to cast the types again
+        # TODO: Breaks if values are NaN or infinite
+        # self._input_data.cast_types(self.domain)
+
+    def overwrite(
+        self, indices: Iterable[int],
+            domain: Optional[Domain] = None,
+            input_data: Optional[DataTypes] = None,
+            output_data: Optional[DataTypes] = None,
+            jobs: Optional[Path | str] = None,
+            add_if_not_exist: bool = False
+    ) -> None:
+        """Overwrite the ExperimentData object.
+
+        Parameters
+        ----------
+        indices : Iterable[int]
+            The indices to overwrite.
+        domain : Optional[Domain], optional
+            Domain of the new object, by default None
+        input_data : Optional[DataTypes], optional
+            input parameters of the new object, by default None
+        output_data : Optional[DataTypes], optional
+            output parameters of the new object, by default None
+        jobs : Optional[Path  |  str], optional
+            jobs off the new object, by default None
+        add_if_not_exist : bool, optional
+            If True, the new objects are added if the requested indices
+            do not exist in the current ExperimentData object, by default False
+        """
+
+        # Be careful, if a job has output data and gets overwritten with a
+        # job that has no output data, the status is set to open. But the job
+        # will still have the output data!
+
+        # This is usually not a problem, because the output data will be
+        # immediately overwritten in optimization.
+
+        self._overwrite_experiments(
+            indices=indices,
+            experiment_sample=ExperimentData(
+                domain=domain, input_data=input_data,
+                output_data=output_data,
+                jobs=jobs),
+            add_if_not_exist=add_if_not_exist)
+
+    def _overwrite_experiments(
+        self, indices: Iterable[int],
+            experiment_sample: ExperimentSample | ExperimentData,
+            add_if_not_exist: bool) -> None:
+        """
+        Overwrite the ExperimentData object at the given indices.
+
+        Parameters
+        ----------
+        indices : Iterable[int]
+            The indices to overwrite.
+        experimentdata : ExperimentData | ExperimentSample
+            The new ExperimentData object to overwrite with.
+        add_if_not_exist : bool
+            If True, the new objects are added if the requested indices
+            do not exist in the current ExperimentData object.
+        """
+        if not all(pd.Index(indices).isin(self.index)):
+            if add_if_not_exist:
+                self.add_experiments(experiment_sample)
+                return
+            else:
+                raise ValueError(
+                    f"The given indices {indices} do not exist in the current "
+                    f"ExperimentData object. "
+                    f"If you want to add the new experiments, "
+                    f"set add_if_not_exist to True.")
+
+        self._input_data.overwrite(
+            rows=indices, other=experiment_sample._input_data)
+        self._output_data.overwrite(
+            rows=indices, other=experiment_sample._output_data)
+
+        self._jobs.overwrite(
+            indices=indices, other=experiment_sample._jobs)
+
+        if isinstance(experiment_sample, ExperimentData):
+            self.domain += experiment_sample.domain
+
+    @_access_file
+    def overwrite_disk(
+        self, indices: Iterable[int],
+            domain: Optional[Domain] = None,
+            input_data: Optional[DataTypes] = None,
+            output_data: Optional[DataTypes] = None,
+            jobs: Optional[Path | str] = None,
+            add_if_not_exist: bool = False
+    ) -> None:
+        self.overwrite(indices=indices, domain=domain, input_data=input_data,
+                       output_data=output_data, jobs=jobs,
+                       add_if_not_exist=add_if_not_exist)
+
+    def add_input_parameter(
+        self, name: str,
+        type: Literal['float', 'int', 'category', 'constant'],
+            **kwargs):
+        """Add a new input column to the ExperimentData object.
+
+        Parameters
+        ----------
+        name
+            name of the new input column
+        type
+            type of the new input column: float, int, category or constant
+        kwargs
+            additional arguments for the new parameter
+        """
+        self._input_data.add_column(name)
+        self.domain.add(name=name, type=type, **kwargs)
+
+    def add_output_parameter(
+            self, name: str, is_disk: bool, exist_ok: bool = False) -> None:
+        """Add a new output column to the ExperimentData object.
+
+        Parameters
+        ----------
+        name
+            name of the new output column
+        is_disk
+            Whether the output column will be stored on disk or not
+        exist_ok
+            If True, it will not raise an error if the output column already
+            exists, by default False
+        """
+        self._output_data.add_column(name, exist_ok=exist_ok)
+        self.domain.add_output(name=name, to_disk=is_disk, exist_ok=exist_ok)
+
+    def remove_rows_bottom(self, number_of_rows: int):
+        """
+        Remove a number of rows from the end of the ExperimentData object.
+
+        Parameters
+        ----------
+        number_of_rows : int
+            Number of rows to remove from the bottom.
+        """
+        if number_of_rows == 0:
+            return  # Don't do anything if 0 rows need to be removed
+
+        # get the last indices from data.data
+        indices = self.index[-number_of_rows:]
+
+        # remove the indices rows_to_remove from data.data
+        self._input_data.remove(indices)
+        self._output_data.remove(indices)
+        self._jobs.remove(indices)
+
+    def _reset_index(self) -> None:
+        """
+        Reset the index of the ExperimentData object.
+        """
+        self._input_data.reset_index()
+
+        if self._input_data.is_empty():
+            self._output_data.reset_index()
+        else:
+            self._output_data.reset_index(self._input_data.indices)
+        self._jobs.reset_index()
+
+    def join(self, other: ExperimentData) -> ExperimentData:
+        """Join two ExperimentData objects.
+
+        Parameters
+        ----------
+        other : ExperimentData
+            The other ExperimentData object to join with.
+
+        Returns
+        -------
+        ExperimentData
+            The joined ExperimentData object.
+        """
+        return ExperimentData(
+            input_data=self._input_data.join(other._input_data),
+            output_data=self._output_data.join(other._output_data),
+            jobs=self._jobs,
+            domain=self.domain + other.domain,
+            project_dir=self.project_dir)
+#                                                                  ExperimentSample
+    # =============================================================================
+
+    def get_experiment_sample(self, index: int) -> ExperimentSample:
+        """
+        Gets the experiment_sample at the given index.
+
+        Parameters
+        ----------
+        index : int
+            The index of the experiment_sample to retrieve.
+
+        Returns
+        -------
+        ExperimentSample
+            The ExperimentSample at the given index.
+        """
+        output_experiment_sample_dict = self._output_data.get_data_dict(index)
+
+        dict_output = {k: (v, self.domain.output_space[k].to_disk)
+                       for k, v in output_experiment_sample_dict.items()}
+
+        return ExperimentSample(dict_input=self._input_data.get_data_dict(
+            index),
+            dict_output=dict_output,
+            jobnumber=index,
+            experimentdata_directory=self.project_dir)
+
+    def get_experiment_samples(
+            self,
+            indices: Optional[Iterable[int]] = None) -> List[ExperimentSample]:
+        """
+        Gets the experiment_samples at the given indices.
+
+        Parameters
+        ----------
+        indices : Optional[Iterable[int]], optional
+            The indices of the experiment_samples to retrieve, by default None
+            If None, all experiment_samples are retrieved.
+
+        Returns
+        -------
+        List[ExperimentSample]
+            The ExperimentSamples at the given indices.
+        """
+        if indices is None:
+            # Return a list of the iterator over ExperimentData
+            return list(self)
+
+        return [self.get_experiment_sample(index) for index in indices]
+
+    def _set_experiment_sample(self,
+                               experiment_sample: ExperimentSample) -> None:
+        """
+        Sets the ExperimentSample at the given index.
+
+        Parameters
+        ----------
+        experiment_sample : ExperimentSample
+            The ExperimentSample to set.
+        """
+        for column, (value, is_disk) in experiment_sample._dict_output.items():
+
+            if not self.domain.is_in_output(column):
+                self.domain.add_output(column, to_disk=is_disk)
+
+            self._output_data.set_data(
+                row=experiment_sample.job_number, value=value,
+                column=column)
+
+        self._jobs.mark(experiment_sample._jobnumber, status=Status.FINISHED)
+
+    @_access_file
+    def _write_experiment_sample(self,
+                                 experiment_sample: ExperimentSample) -> None:
+        """
+        Sets the ExperimentSample at the given index.
+
+        Parameters
+        ----------
+        experiment_sample : ExperimentSample
+            The ExperimentSample to set.
+        """
+        self._set_experiment_sample(experiment_sample)
+
+    def _access_open_job_data(self) -> ExperimentSample:
+        """Get the data of the first available open job.
+
+        Returns
+        -------
+        ExperimentSample
+            The ExperimentSample object of the first available open job.
+        """
+        job_index = self._jobs.get_open_job()
+        self._jobs.mark(job_index, status=Status.IN_PROGRESS)
+        experiment_sample = self.get_experiment_sample(job_index)
+        return experiment_sample
+
+    @_access_file
+    def _get_open_job_data(self) -> ExperimentSample:
+        """Get the data of the first available open job by
+        accessing the ExperimenData on disk.
+
+        Returns
+        -------
+        ExperimentSample
+            The ExperimentSample object of the first available open job.
+        """
+        return self._access_open_job_data()
+
+    #                                                                      Jobs
+    # =========================================================================
+
+    def _set_error(self, index: int) -> None:
+        """Mark the experiment_sample at the given index as error.
+
+        Parameters
+        ----------
+        index
+            index of the experiment_sample to mark as error
+        """
+        # self.jobs.mark_as_error(index)
+        self._jobs.mark(index, status=Status.ERROR)
+        self._output_data.set_data(
+            index,
+            value=['ERROR' for _ in self._output_data.names])
+
+    @_access_file
+    def _write_error(self, index: int):
+        """Mark the experiment_sample at the given index as
+         error and write to ExperimentData file.
+
+        Parameters
+        ----------
+        index
+            index of the experiment_sample to mark as error
+        """
+        self._set_error(index)
+
+    @_access_file
+    def is_all_finished(self) -> bool:
+        """Check if all jobs are finished
+
+        Returns
+        -------
+        bool
+            True if all jobs are finished, False otherwise
+        """
+        return self._jobs.is_all_finished()
+
+    def mark(self, indices: Iterable[int],
+             status: Literal['open', 'in progress', 'finished', 'error']):
+        """Mark the jobs at the given indices with the given status.
+
+        Parameters
+        ----------
+        indices : Iterable[int]
+            indices of the jobs to mark
+        status : Literal['open', 'in progress', 'finished', 'error']
+            status to mark the jobs with: choose between: 'open', \
+            'in progress', 'finished' or 'error'
+
+        Raises
+        ------
+        ValueError
+            If the given status is not any of 'open', 'in progress', \
+            'finished' or 'error'
+        """
+        # Check if the status is in Status
+        if not any(status.lower() == s.value for s in Status):
+            raise ValueError(f"Invalid status {status} given. "
+                             f"\nChoose from values: "
+                             f"{', '.join([s.value for s in Status])}")
+
+        self._jobs.mark(indices, status)
+
+    def mark_all(self,
+                 status: Literal['open', 'in progress', 'finished', 'error']):
+        """Mark all the experiments with the given status
+
+        Parameters
+        ----------
+        status : Literal['open', 'in progress', 'finished', 'error']
+            status to mark the jobs with: \
+            choose between:
+
+            * 'open',
+            * 'in progress',
+            * 'finished'
+            * 'error'
+
+        Raises
+        ------
+        ValueError
+            If the given status is not any of \
+            'open', 'in progress', 'finished' or 'error'
+        """
+        self.mark(self._jobs.indices, status)
+
+    def mark_all_error_open(self) -> None:
+        """
+        Mark all the experiments that have the status 'error' open
+        """
+        self._jobs.mark_all_error_open()
+
+    def mark_all_in_progress_open(self) -> None:
+        """
+        Mark all the experiments that have the status 'in progress' open
+        """
+        self._jobs.mark_all_in_progress_open()
+
+    def mark_all_nan_open(self) -> None:
+        """
+        Mark all the experiments that have 'nan' in output open
+        """
+        indices = self._output_data.get_index_with_nan()
+        self.mark(indices=indices, status='open')
+    #                                                            Datageneration
+    # =========================================================================
+
+    def evaluate(self, data_generator: DataGenerator,
+                 mode: Literal['sequential', 'parallel',
+                               'cluster', 'cluster_parallel'] = 'sequential',
+                 kwargs: Optional[dict] = None,
+                 output_names: Optional[List[str]] = None) -> None:
+        """Run any function over the entirety of the experiments
+
+        Parameters
+        ----------
+        data_generator : DataGenerator
+            data generator to use
+        mode : str, optional
+            operational mode, by default 'sequential'. Choose between:
+
+            * 'sequential' : Run the operation sequentially
+            * 'parallel' : Run the operation on multiple cores
+            * 'cluster' : Run the operation on the cluster
+            * 'cluster_parallel' : Run the operation on the cluster in parallel
+
+        kwargs, optional
+            Any keyword arguments that need to
+            be supplied to the function, by default None
+        output_names : List[str], optional
+            If you provide a function as data generator, you have to provide
+            the names of all the output parameters that are in the return
+            statement, in order of appearance.
+
+        Raises
+        ------
+        ValueError
+            Raised when invalid parallelization mode is specified
+        """
+        if kwargs is None:
+            kwargs = {}
+
+        if inspect.isfunction(data_generator):
+            if output_names is None:
+                raise TypeError(
+                    ("If you provide a function as data generator, you have to"
+                     "provide the names of the return arguments with the"
+                     "output_names attribute."))
+            data_generator = convert_function(
+                f=data_generator, output=output_names)
+
+        elif isinstance(data_generator, str):
+            data_generator = _datagenerator_factory(
+                data_generator, self.domain, kwargs)
+
+        if mode.lower() == "sequential":
+            return self._run_sequential(data_generator, kwargs)
+        elif mode.lower() == "parallel":
+            return self._run_multiprocessing(data_generator, kwargs)
+        elif mode.lower() == "cluster":
+            return self._run_cluster(data_generator, kwargs)
+        elif mode.lower() == "cluster_parallel":
+            return self._run_cluster_parallel(data_generator, kwargs)
+        else:
+            raise ValueError("Invalid parallelization mode specified.")
+
+    def _run_sequential(self, data_generator: DataGenerator, kwargs: dict):
+        """Run the operation sequentially
+
+        Parameters
+        ----------
+        operation : ExperimentSampleCallable
+            function execution for every entry in the ExperimentData object
+        kwargs : dict
+            Any keyword arguments that need to be supplied to the function
+
+        Raises
+        ------
+        NoOpenJobsError
+            Raised when there are no open jobs left
+        """
+        while True:
+            try:
+                experiment_sample = self._access_open_job_data()
+                logger.debug(
+                    f"Accessed experiment_sample \
+                         {experiment_sample._jobnumber}")
+            except NoOpenJobsError:
+                logger.debug("No Open Jobs left")
+                break
+
+            try:
+
+                # If kwargs is empty dict
+                if not kwargs:
+                    logger.debug(
+                        f"Running experiment_sample "
+                        f"{experiment_sample._jobnumber}")
+                else:
+                    logger.debug(
+                        f"Running experiment_sample "
+                        f"{experiment_sample._jobnumber} with kwargs {kwargs}")
+
+                _experiment_sample = data_generator._run(
+                    experiment_sample, **kwargs)  # no *args!
+                self._set_experiment_sample(_experiment_sample)
+            except Exception as e:
+                error_msg = f"Error in experiment_sample \
+                     {experiment_sample._jobnumber}: {e}"
+                error_traceback = traceback.format_exc()
+                logger.error(f"{error_msg}\n{error_traceback}")
+                self._set_error(experiment_sample._jobnumber)
+
+    def _run_multiprocessing(self, data_generator: DataGenerator,
+                             kwargs: dict):
+        """Run the operation on multiple cores
+
+        Parameters
+        ----------
+        operation : ExperimentSampleCallable
+            function execution for every entry in the ExperimentData object
+        kwargs : dict
+            Any keyword arguments that need to be supplied to the function
+
+        Raises
+        ------
+        NoOpenJobsError
+            Raised when there are no open jobs left
+        """
+        # Get all the jobs
+        options = []
+        while True:
+            try:
+                experiment_sample = self._access_open_job_data()
+                options.append(
+                    ({'experiment_sample': experiment_sample, **kwargs},))
+            except NoOpenJobsError:
+                break
+
+        def f(options: Dict[str, Any]) -> Tuple[ExperimentSample, int]:
+            try:
+
+                logger.debug(
+                    f"Running experiment_sample "
+                    f"{options['experiment_sample'].job_number}")
+
+                return (data_generator._run(**options), 0)  # no *args!
+
+            except Exception as e:
+                error_msg = f"Error in experiment_sample \
+                     {options['experiment_sample'].job_number}: {e}"
+                error_traceback = traceback.format_exc()
+                logger.error(f"{error_msg}\n{error_traceback}")
+                return (options['experiment_sample'], 1)
+
+        with mp.Pool() as pool:
+            # maybe implement pool.starmap_async ?
+            _experiment_samples: List[
+                Tuple[ExperimentSample, int]] = pool.starmap(f, options)
+
+        for _experiment_sample, exit_code in _experiment_samples:
+            if exit_code == 0:
+                self._set_experiment_sample(_experiment_sample)
+            else:
+                self._set_error(_experiment_sample.job_number)
+
+    def _run_cluster(self, data_generator: DataGenerator, kwargs: dict):
+        """Run the operation on the cluster
+
+        Parameters
+        ----------
+        operation : ExperimentSampleCallable
+            function execution for every entry in the ExperimentData object
+        kwargs : dict
+            Any keyword arguments that need to be supplied to the function
+
+        Raises
+        ------
+        NoOpenJobsError
+            Raised when there are no open jobs left
+        """
+        # Retrieve the updated experimentdata object from disc
+        try:
+            self = self.from_file(self.project_dir)
+        except FileNotFoundError:  # If not found, store current
+            self.store()
+
+        while True:
+            try:
+                experiment_sample = self._get_open_job_data()
+            except NoOpenJobsError:
+                logger.debug("No Open jobs left!")
+                break
+
+            try:
+                _experiment_sample = data_generator._run(
+                    experiment_sample, **kwargs)
+                self._write_experiment_sample(_experiment_sample)
+            except Exception:
+                n = experiment_sample.job_number
+                error_msg = f"Error in experiment_sample {n}: "
+                error_traceback = traceback.format_exc()
+                logger.error(f"{error_msg}\n{error_traceback}")
+                self._write_error(experiment_sample._jobnumber)
+                continue
+
+        self = self.from_file(self.project_dir)
+        # Remove the lockfile from disk
+        (self.project_dir / EXPERIMENTDATA_SUBFOLDER / LOCK_FILENAME
+         ).with_suffix('.lock').unlink(missing_ok=True)
+
+    def _run_cluster_parallel(
+            self, data_generator: DataGenerator, kwargs: dict):
+        """Run the operation on the cluster and parallelize it over cores
+
+        Parameters
+        ----------
+        operation : ExperimentSampleCallable
+            function execution for every entry in the ExperimentData object
+        kwargs : dict
+            Any keyword arguments that need to be supplied to the function
+
+        Raises
+        ------
+        NoOpenJobsError
+            Raised when there are no open jobs left
+        """
+        # Retrieve the updated experimentdata object from disc
+        try:
+            self = self.from_file(self.project_dir)
+        except FileNotFoundError:  # If not found, store current
+            self.store()
+
+        no_jobs = False
+
+        while True:
+            es_list = []
+            for core in range(mp.cpu_count()):
+                try:
+                    es_list.append(self._get_open_job_data())
+                except NoOpenJobsError:
+                    logger.debug("No Open jobs left!")
+                    no_jobs = True
+                    break
+
+            d = self.select([e.job_number for e in es_list])
+
+            d._run_multiprocessing(
+                data_generator=data_generator, kwargs=kwargs)
+
+            # TODO access resource first!
+            self.overwrite_disk(
+                indices=d.index, input_data=d._input_data,
+                output_data=d._output_data, jobs=d._jobs,
+                domain=d.domain, add_if_not_exist=False)
+
+            if no_jobs:
+                break
+
+        self = self.from_file(self.project_dir)
+        # Remove the lockfile from disk
+        (self.project_dir / EXPERIMENTDATA_SUBFOLDER / LOCK_FILENAME
+         ).with_suffix('.lock').unlink(missing_ok=True)
+
+    #                                                              Optimization
+    # =========================================================================
+
+    def optimize(self, optimizer: Optimizer | str,
+                 data_generator: DataGenerator | str,
+                 iterations: int,
+                 kwargs: Optional[Dict[str, Any]] = None,
+                 hyperparameters: Optional[Dict[str, Any]] = None,
+                 x0_selection: Literal['best', 'random',
+                                       'last',
+                                       'new'] | ExperimentData = 'best',
+                 sampler: Optional[Sampler | str] = 'random',
+                 overwrite: bool = False,
+                 callback: Optional[Callable] = None) -> None:
+        """Optimize the experimentdata object
+
+        Parameters
+        ----------
+        optimizer : Optimizer | str
+            Optimizer object
+        data_generator : DataGenerator | str
+            DataGenerator object
+        iterations : int
+            number of iterations
+        kwargs : Dict[str, Any], optional
+            any additional keyword arguments that will be passed to
+            the DataGenerator
+        hyperparameters : Dict[str, Any], optional
+            any additional keyword arguments that will be passed to
+            the optimizer
+        x0_selection : str | ExperimentData
+            How to select the initial design. By default 'best'
+            The following x0_selections are available:
+
+            * 'best': Select the best designs from the current experimentdata
+            * 'random': Select random designs from the current experimentdata
+            * 'last': Select the last designs from the current experimentdata
+            * 'new': Create new random designs from the current experimentdata
+
+            If the x0_selection is 'new', new designs are sampled with the
+            sampler provided. The number of designs selected is equal to the
+            population size of the optimizer.
+
+            If an ExperimentData object is passed as x0_selection,
+            the optimizer will use the input_data and output_data from this
+            object as initial samples.
+        sampler: Sampler, optional
+            If x0_selection = 'new', the sampler to use. By default 'random'
+        overwrite: bool, optional
+            If True, the optimizer will overwrite the current data. By default
+            False
+        callback : Callable, optional
+            A callback function that is called after every iteration. It has
+            the following signature:
+
+                    ``callback(intermediate_result: ExperimentData)``
+
+            where the first argument is a parameter containing an
+            `ExperimentData` object with the current iterate(s).
+
+        Raises
+        ------
+        ValueError
+            Raised when invalid x0_selection is specified
+        """
+        # Create the data generator object if a string reference is passed
+        if isinstance(data_generator, str):
+            data_generator: DataGenerator = _datagenerator_factory(
+                data_generator=data_generator,
+                domain=self.domain, kwargs=kwargs)
+
+        # Create a copy of the optimizer object
+        _optimizer = copy(optimizer)
+
+        # Create the optimizer object if a string reference is passed
+        if isinstance(_optimizer, str):
+            _optimizer: Optimizer = _optimizer_factory(
+                _optimizer, self.domain, hyperparameters)
+
+        # Create the sampler object if a string reference is passed
+        if isinstance(sampler, str):
+            sampler: Sampler = _sampler_factory(sampler, self.domain)
+
+        if _optimizer.type == 'scipy':
+            self._iterate_scipy(
+                optimizer=_optimizer, data_generator=data_generator,
+                iterations=iterations, kwargs=kwargs,
+                x0_selection=x0_selection,
+                sampler=sampler,
+                overwrite=overwrite,
+                callback=callback)
+        else:
+            self._iterate(
+                optimizer=_optimizer, data_generator=data_generator,
+                iterations=iterations, kwargs=kwargs,
+                x0_selection=x0_selection,
+                sampler=sampler,
+                overwrite=overwrite,
+                callback=callback)
+
+    def _iterate(self, optimizer: Optimizer, data_generator: DataGenerator,
+                 iterations: int, kwargs: Dict[str, Any], x0_selection: str,
+                 sampler: Sampler, overwrite: bool,
+                 callback: Callable):
+        """Internal represenation of the iteration process
+
+        Parameters
+        ----------
+        optimizer : Optimizer
+            Optimizer object
+        data_generator : DataGenerator
+            DataGenerator object
+        iterations : int
+            number of iterations
+        kwargs : Dict[str, Any]
+            any additional keyword arguments that will be passed to
+            the DataGenerator
+        x0_selection : str | ExperimentData
+            How to select the initial design.
+            The following x0_selections are available:
+
+            * 'best': Select the best designs from the current experimentdata
+            * 'random': Select random designs from the current experimentdata
+            * 'last': Select the last designs from the current experimentdata
+            * 'new': Create new random designs from the current experimentdata
+
+            If the x0_selection is 'new', new designs are sampled with the
+            sampler provided. The number of designs selected is equal to the
+            population size of the optimizer.
+
+            If an ExperimentData object is passed as x0_selection,
+            the optimizer will use the input_data and output_data from this
+            object as initial samples.
+
+        sampler: Sampler
+            If x0_selection = 'new', the sampler to use
+        overwrite: bool
+            If True, the optimizer will overwrite the current data.
+        callback : Callable
+            A callback function that is called after every iteration. It has
+            the following signature:
+
+                    ``callback(intermediate_result: ExperimentData)``
+
+            where the first argument is a parameter containing an
+            `ExperimentData` object with the current iterate(s).
+
+        Raises
+        ------
+        ValueError
+            Raised when invalid x0_selection is specified
+        """
+        last_index = self.index[-1] if not self.index.empty else -1
+
+        if isinstance(x0_selection, str):
+            if x0_selection == 'new':
+
+                if iterations < optimizer._population:
+                    raise ValueError(
+                        f'For creating new samples, the total number of '
+                        f'requested iterations ({iterations}) cannot be '
+                        f'smaller than the population size '
+                        f'({optimizer._population})')
+
+                init_samples = ExperimentData.from_sampling(
+                    domain=self.domain,
+                    sampler=sampler,
+                    n_samples=optimizer._population,
+                    seed=optimizer._seed)
+
+                init_samples.evaluate(
+                    data_generator=data_generator, kwargs=kwargs,
+                    mode='sequential')
+
+                if callback is not None:
+                    callback(init_samples)
+
+                if overwrite:
+                    _indices = init_samples.index + last_index + 1
+                    self._overwrite_experiments(
+                        experiment_sample=init_samples,
+                        indices=_indices,
+                        add_if_not_exist=True)
+
+                else:
+                    self.add_experiments(init_samples)
+
+                x0_selection = 'last'
+                iterations -= optimizer._population
+
+        x0 = x0_factory(experiment_data=self, mode=x0_selection,
+                        n_samples=optimizer._population)
+        optimizer._set_data(x0)
+
+        optimizer._check_number_of_datapoints()
+
+        optimizer._construct_model(data_generator)
+
+        for _ in range(number_of_updates(
+                iterations,
+                population=optimizer._population)):
+            new_samples = optimizer.update_step(data_generator)
+
+            # If new_samples is a tuple of input_data and output_data
+            if isinstance(new_samples, tuple):
+                new_samples = ExperimentData(
+                    domain=self.domain,
+                    input_data=new_samples[0],
+                    output_data=new_samples[1],
+                )
+            # If applicable, evaluate the new designs:
+            new_samples.evaluate(
+                data_generator, mode='sequential', kwargs=kwargs)
+
+            if callback is not None:
+                callback(new_samples)
+
+            if overwrite:
+                _indices = new_samples.index + last_index + 1
+                self._overwrite_experiments(experiment_sample=new_samples,
+                                            indices=_indices,
+                                            add_if_not_exist=True)
+
+            else:
+                self.add_experiments(new_samples)
+
+            optimizer._set_data(self)
+
+        if not overwrite:
+            # Remove overiterations
+            self.remove_rows_bottom(number_of_overiterations(
+                iterations,
+                population=optimizer._population))
+
+        # Reset the optimizer
+        # optimizer.reset(ExperimentData(domain=self.domain))
+
+    def _iterate_scipy(self, optimizer: Optimizer,
+                       data_generator: DataGenerator,
+                       iterations: int, kwargs: dict,
+                       x0_selection: str | ExperimentData,
+                       sampler: Sampler, overwrite: bool,
+                       callback: Callable):
+        """Internal represenation of the iteration process for scipy-minimize
+        optimizers.
+
+        Parameters
+        ----------
+        optimizer : Optimizer
+            Optimizer object
+        data_generator : DataGenerator
+            DataGenerator object
+        iterations : int
+            number of iterations
+        kwargs : Dict[str, Any]
+            any additional keyword arguments that will be passed to
+            the DataGenerator
+        x0_selection : str | ExperimentData
+            How to select the initial design.
+            The following x0_selections are available:
+
+            * 'best': Select the best designs from the current experimentdata
+            * 'random': Select random designs from the current experimentdata
+            * 'last': Select the last designs from the current experimentdata
+            * 'new': Create new random designs from the current experimentdata
+
+            If the x0_selection is 'new', new designs are sampled with the
+            sampler provided. The number of designs selected is equal to the
+            population size of the optimizer.
+
+            If an ExperimentData object is passed as x0_selection,
+            the optimizer will use the input_data and output_data from this
+            object as initial samples.
+
+        sampler: Sampler
+            If x0_selection = 'new', the sampler to use
+        overwrite: bool
+            If True, the optimizer will overwrite the current data.
+        callback : Callable
+            A callback function that is called after every iteration. It has
+            the following signature:
+
+                    ``callback(intermediate_result: ExperimentData)``
+
+            where the first argument is a parameter containing an
+            `ExperimentData` object with the current iterate(s).
+
+        Raises
+        ------
+        ValueError
+            Raised when invalid x0_selection is specified
+        """
+        last_index = self.index[-1] if not self.index.empty else -1
+        n_data_before_iterate = len(self)
+
+        if isinstance(x0_selection, str):
+            if x0_selection == 'new':
+
+                if iterations < optimizer._population:
+                    raise ValueError(
+                        f'For creating new samples, the total number of '
+                        f'requested iterations ({iterations}) cannot be '
+                        f'smaller than the population size '
+                        f'({optimizer._population})')
+
+                init_samples = ExperimentData.from_sampling(
+                    domain=self.domain,
+                    sampler=sampler,
+                    n_samples=optimizer._population,
+                    seed=optimizer._seed)
+
+                init_samples.evaluate(
+                    data_generator=data_generator, kwargs=kwargs,
+                    mode='sequential')
+
+                if callback is not None:
+                    callback(init_samples)
+
+                if overwrite:
+                    _indices = init_samples.index + last_index + 1
+                    self._overwrite_experiments(
+                        experiment_sample=init_samples,
+                        indices=_indices,
+                        add_if_not_exist=True)
+
+                else:
+                    self.add_experiments(init_samples)
+
+                x0_selection = 'last'
+
+        x0 = x0_factory(experiment_data=self, mode=x0_selection,
+                        n_samples=optimizer._population)
+        optimizer._set_data(x0)
+
+        optimizer._check_number_of_datapoints()
+
+        optimizer.run_algorithm(iterations, data_generator)
+
+        new_samples: ExperimentData = optimizer.data.select(
+            optimizer.data.index[1:])
+        new_samples.evaluate(data_generator, mode='sequential', kwargs=kwargs)
+
+        if callback is not None:
+            callback(new_samples)
+
+        if overwrite:
+            self.add_experiments(
+                optimizer.data.select([optimizer.data.index[-1]]))
+
+        elif not overwrite:
+            # Do not add the first element, as this is already
+            # in the sampled data
+            self.add_experiments(new_samples)
+
+            # TODO: At the end, the data should have
+            # n_data_before_iterate + iterations amount of elements!
+            # If x_new is empty, repeat best x0 to fill up total iteration
+            if len(self) == n_data_before_iterate:
+                repeated_sample = self.get_n_best_output(
+                    n_samples=1)
+
+                for repetition in range(iterations):
+                    self.add_experiments(repeated_sample)
+
+            # Repeat last iteration to fill up total iteration
+            if len(self) < n_data_before_iterate + iterations:
+                last_design = self.get_experiment_sample(len(self)-1)
+
+                while len(self) < n_data_before_iterate + iterations:
+                    self.add_experiments(last_design)
+
+        # Evaluate the function on the extra iterations
+        self.evaluate(data_generator, mode='sequential', kwargs=kwargs)
+
+        # Reset the optimizer
+        # optimizer.reset(ExperimentData(domain=self.domain))
+
+    #                                                                  Sampling
+    # =========================================================================
+
+    def sample(self, sampler: Sampler | SamplerNames, n_samples: int = 1,
+               seed: Optional[int] = None, **kwargs) -> None:
+        """Sample data from the domain providing the sampler strategy
+
+        Parameters
+        ----------
+        sampler: Sampler | str
+            Sampler callable or string of built-in sampler
+            If a string is passed, it should be one of the built-in samplers:
+
+            * 'random' : Random sampling
+            * 'latin' : Latin Hypercube Sampling
+            * 'sobol' : Sobol Sequence Sampling
+            * 'grid' : Grid Search Sampling
+        n_samples : int, optional
+            Number of samples to generate, by default 1
+        seed : Optional[int], optional
+            Seed to use for the sampler, by default None
+
+        Note
+        ----
+        When using the 'grid' sampler, an optional argument
+        'stepsize_continuous_parameters' can be passed to specify the stepsize
+        to cast continuous parameters to discrete parameters.
+
+        - The stepsize should be a dictionary with the parameter names as keys\
+        and the stepsize as values.
+        - Alternatively, a single stepsize can be passed for all continuous\
+        parameters.
+
+        Raises
+        ------
+        ValueError
+            Raised when invalid sampler type is specified
+        """
+
+        if isinstance(sampler, str):
+            sampler = _sampler_factory(sampler, self.domain)
+
+        sample_data: DataTypes = sampler(
+            domain=self.domain, n_samples=n_samples, seed=seed, **kwargs)
+        self.add(input_data=sample_data, domain=self.domain)
+
+    #                                                         Project directory
+    # =========================================================================
+
+    def set_project_dir(self, project_dir: Path | str):
+        """Set the directory of the f3dasm project folder.
+
+        Parameters
+        ----------
+        project_dir : Path or str
+            Path to the project directory
+        """
+        self.project_dir = _project_dir_factory(project_dir)
+
+
+def x0_factory(experiment_data: ExperimentData,
+               mode: str | ExperimentData, n_samples: int):
+    """Set the initial population to the best n samples of the given data
+
+    Parameters
+    ----------
+    experiment_data : ExperimentData
+        Data to be used for the initial population
+    mode : str
+        Mode of selecting the initial population, by default 'best'
+        The following modes are available:
+
+            - best: select the best n samples
+            - random: select n random samples
+            - last: select the last n samples
+    n_samples : int
+        Number of samples to select
+
+    Raises
+    ------
+    ValueError
+        Raises when the mode is not recognized
+    """
+    if isinstance(mode, ExperimentData):
+        x0 = mode
+
+    elif mode == 'best':
+        x0 = experiment_data.get_n_best_output(n_samples)
+
+    elif mode == 'random':
+        x0 = experiment_data.select(
+            np.random.choice(
+                experiment_data.index,
+                size=n_samples, replace=False))
+
+    elif mode == 'last':
+        x0 = experiment_data.select(
+            experiment_data.index[-n_samples:])
+
+    else:
+        raise ValueError(
+            f'Unknown selection mode {mode}, use best, random or last')
+
+    x0._reset_index()
+    return x0
+
+
+def combine_data_to_multiindex(
+        experiment_data: ExperimentData) -> pd.DataFrame:
+    """Combine the data to a multiindex dataframe.
+
+    Parameters
+    ----------
+    experiment_data: ExperimentData
+        The ExperimentData object to combine
+
+    Returns
+    -------
+    pd.DataFrame
+        The combined dataframe.
+
+    Note
+    ----
+    This function is mainly used to show the combined ExperimentData
+    object in a Jupyter Notebook
+    """
+    return pd.concat(
+        [experiment_data._jobs.to_dataframe(),
+         experiment_data._input_data.to_dataframe(),
+         experiment_data._output_data.to_dataframe()],
+        axis=1, keys=['jobs', 'input', 'output'])
diff --git a/src/f3dasm/_src/experimentdata/_jobqueue.py b/src/f3dasm/_src/experimentdata/_jobqueue.py
index 438b6c4d..79264ce1 100644
--- a/src/f3dasm/_src/experimentdata/_jobqueue.py
+++ b/src/f3dasm/_src/experimentdata/_jobqueue.py
@@ -91,7 +91,7 @@ def __add__(self, other: _JobQueue | str) -> _JobQueue:
         other_jobs_copy.index = other_jobs_copy.index + last_index + 1
         return _JobQueue(pd.concat([self.jobs, other_jobs_copy]))
 
-    def __getitem__(self, index: int | slice | Iterable[int]) -> _Data:
+    def __getitem__(self, index: int | slice | Iterable[int]) -> _JobQueue:
         """Get a subset of the data.
 
         Parameters
@@ -163,6 +163,7 @@ def from_file(cls: Type[_JobQueue], filename: Path | str) -> _JobQueue:
 
         return cls(pd.read_pickle(filename))
 
+    # TODO: This function is not used!
     def reset(self) -> None:
         """Resets the job queue."""
         self.jobs = pd.Series(dtype='string')
@@ -230,6 +231,7 @@ def remove(self, indices: List[int]):
         """
         self.jobs = self.jobs.drop(indices)
 
+    # TODO: Remove this method as it is not used!
     def add(self, number_of_jobs: int = 1, status: str = Status.OPEN):
         """Adds a number of jobs to the job queue.
 

From c4c8a76fc9dccb300d5ef50fbb1fffd40921b014 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Mon, 24 Jun 2024 09:30:33 +0200
Subject: [PATCH 08/17] remove commented code

---
 .../_experimental/_newexperimentdata2.py            | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
index cb776297..9e762296 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
@@ -132,14 +132,6 @@ def __init__(self,
         self._jobs = _jobs_factory(
             jobs, self._input_data, self._output_data, job_value)
 
-        # # Check if the columns of input_data are in the domain
-        # if not self._input_data.columns.has_columnnames(self.domain.names):
-        #     self._input_data.columns.set_columnnames(self.domain.names)
-
-        # if not self._output_data.columns.has_columnnames(
-        #         self.domain.output_names):
-        #     self._output_data.columns.set_columnnames(self.domain.output_names)
-
         # For backwards compatibility; if the output_data has
         #  only one column, rename it to 'y'
         # TODO: Fix this for newdata2
@@ -253,11 +245,6 @@ def index(self) -> pd.Index:
         """
         return self._jobs.indices
 
-        # if self._input_data.is_empty():
-        #     return self._output_data.indices
-
-        # return self._input_data.indices
-
     #                                                  Alternative Constructors
     # =========================================================================
 

From 9be592857135de2ed3eb607b4084405a7e383e29 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Tue, 25 Jun 2024 10:26:44 +0200
Subject: [PATCH 09/17] Added docstings and tests for _newdata2 object

---
 .../experimentdata/_experimental/_newdata2.py | 389 +++++++++++++++--
 .../_experimental/_newexperimentdata2.py      |   5 -
 tests/newdata/conftest.py                     |  33 +-
 tests/newdata/test_data.py                    | 391 +++++++++---------
 4 files changed, 559 insertions(+), 259 deletions(-)

diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
index 759473e6..4bff29cd 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
@@ -24,25 +24,76 @@
 
 MISSING_VALUE = np.nan
 
+# =============================================================================
+
 
 class _Data:
     def __init__(self, data: Dict[int, Dict[str, Any]] = None):
+        """
+        Initialize the _Data object.
+
+        Parameters
+        ----------
+        data : Dict[int, Dict[str, Any]], optional
+            The data dictionary with integer keys and dictionaries as values.
+        """
         self.data = data if data is not None else {}
 
     def __len__(self) -> int:
+        """
+        Get the number of items in the data.
+
+        Returns
+        -------
+        int
+            Number of items in the data.
+        """
         return len(self.data)
 
     def __iter__(self):
+        """
+        Get an iterator over the data values.
+
+        Returns
+        -------
+        iterator
+            Iterator over the data values.
+        """
         return iter(self.data.values())
 
     def __getitem__(self, rows: int | slice | Iterable[int]) -> _Data:
-
+        """
+        Get a subset of the data.
+
+        Parameters
+        ----------
+        rows : int or slice or Iterable[int]
+            The rows to retrieve.
+
+        Returns
+        -------
+        _Data
+            The subset of the data.
+        """
         if isinstance(rows, int):
             rows = [rows]
 
         return _Data({row: self.data.get(row, {}) for row in rows})
 
     def __add__(self, __o: _Data) -> _Data:
+        """
+        Add another _Data object to this one.
+
+        Parameters
+        ----------
+        __o : _Data
+            The other _Data object.
+
+        Returns
+        -------
+        _Data
+            The combined _Data object.
+        """
         if self.is_empty():
             return __o
 
@@ -56,78 +107,254 @@ def __add__(self, __o: _Data) -> _Data:
         return _data_copy
 
     def __eq__(self, __o: _Data) -> bool:
+        """
+        Check if another _Data object is equal to this one.
+
+        Parameters
+        ----------
+        __o : _Data
+            The other _Data object.
+
+        Returns
+        -------
+        bool
+            True if the objects are equal, False otherwise.
+        """
         return self.data == __o.data
 
     def _repr_html_(self) -> str:
+        """
+        Get the HTML representation of the data.
+
+        Returns
+        -------
+        str
+            The HTML representation of the data.
+        """
         return self.to_dataframe()._repr_html_()
 
     def __repr__(self) -> str:
+        """
+        Get the string representation of the data.
+
+        Returns
+        -------
+        str
+            The string representation of the data.
+        """
         return self.to_dataframe().__repr__()
 
+
+#                                                                    Properties
+# =============================================================================
+
     @property
     def indices(self) -> List[int]:
+        """
+        Get the indices of the data.
+
+        Returns
+        -------
+        List[int]
+            The list of indices.
+        """
         return list(self.data.keys())
 
     @property
     def names(self) -> List[str]:
+        """
+        Get the column names of the data.
+
+        Returns
+        -------
+        List[str]
+            The list of column names.
+        """
         return self.to_dataframe().columns.tolist()
 
+    def is_empty(self) -> bool:
+        """
+        Check if the data is empty.
+
+        Returns
+        -------
+        bool
+            True if the data is empty, False otherwise.
+        """
+        return not bool(self.data)
+
+
+#                                                                Initialization
+# =============================================================================
+
     @classmethod
-    def from_indices(cls, rows: Iterable[int]):
+    def from_indices(cls, rows: Iterable[int]) -> _Data:
+        """
+        Create a _Data object from a list of indices.
+
+        Parameters
+        ----------
+        rows : Iterable[int]
+            The indices to create the _Data object from.
+
+        Returns
+        -------
+        _Data
+            The created _Data object.
+        """
         return cls({row: {} for row in rows})
 
-    # @classmethod
-    # def from_domain(cls, space: Iterable[str]):
-    #     return cls(None)
-
     @classmethod
     def from_file(cls, filename: Path) -> _Data:
+        """
+        Create a _Data object from a file.
+
+        Parameters
+        ----------
+        filename : Path
+            The file to read the data from.
+
+        Returns
+        -------
+        _Data
+            The created _Data object.
+        """
         ...
 
     @classmethod
     def from_numpy(cls: Type[_Data], array: np.ndarray,
                    keys: Optional[Iterable[str]] = None) -> _Data:
+        """
+        Create a _Data object from a numpy array.
+
+        Parameters
+        ----------
+        array : np.ndarray
+            The numpy array to create the _Data object from.
+        keys : Optional[Iterable[str]], optional
+            The keys for the columns of the data.
+
+        Returns
+        -------
+        _Data
+            The created _Data object.
+        """
         if keys is not None:
             return _Data(
                 {index: {key: col for key, col in zip(keys, row)
                          } for index, row in enumerate(array)})
         else:
-            # Look out! i is now an integer key!
             return _Data(
                 {index: {i: col for i, col in enumerate(row)
                          } for index, row in enumerate(array)})
 
     @classmethod
     def from_dataframe(cls, df: pd.DataFrame) -> _Data:
+        """
+        Create a _Data object from a pandas DataFrame.
+
+        Parameters
+        ----------
+        df : pd.DataFrame
+            The DataFrame to create the _Data object from.
+
+        Returns
+        -------
+        _Data
+            The created _Data object.
+        """
         return _Data(
             {index: row.to_dict() for index, (_, row) in
              enumerate(df.iterrows())})
 
+#                                                                     Exporting
+# =============================================================================
+
     def to_numpy(self) -> np.ndarray:
+        """
+        Convert the data to a numpy array.
+
+        Returns
+        -------
+        np.ndarray
+            The numpy array representation of the data.
+        """
         return self.to_dataframe().to_numpy()
 
     def to_xarray(self, label: str):
+        """
+        Convert the data to an xarray DataArray.
+
+        Parameters
+        ----------
+        label : str
+            The label for the xarray DataArray.
+
+        Returns
+        -------
+        xr.DataArray
+            The xarray DataArray representation of the data.
+        """
         df = self.to_dataframe()
-        # Can create the xarray with the information from the domain!
         return xr.DataArray(
             self.to_dataframe(), dims=['iterations', label], coords={
                 'iterations': df.index, label: df.columns})
 
     def to_dataframe(self) -> pd.DataFrame:
-        # Can create the dataframe from the numpy array + column names!!
+        """
+        Convert the data to a pandas DataFrame.
+
+        Returns
+        -------
+        pd.DataFrame
+            The DataFrame representation of the data.
+        """
         return pd.DataFrame(self.data).T
 
     def store(self, filename: Path):
+        """
+        Store the data to a file.
+
+        Parameters
+        ----------
+        filename : Path
+            The file to store the data in.
+        """
         ...
 
-    def n_best_samples(self, nosamples: int, key: str) -> _Data:
-        df = self.to_dataframe()
-        return df.nsmallest(
-            n=nosamples, columns=key)
+    def get_data_dict(self, row: int) -> Dict[str, Any]:
+        """
+        Get the data dictionary for a specific row.
+
+        Parameters
+        ----------
+        row : int
+            The row to retrieve the data from.
+
+        Returns
+        -------
+        Dict[str, Any]
+            The data dictionary for the specified row.
+        """
+        return self.data[row]
 
-    def select_columns(self, keys: Iterable[str] | str) -> _Data:
-        # This only works for single ints or slices!!
+#                                                       Selecting and combining
+# =============================================================================
 
+    def select_columns(self, keys: Iterable[str] | str) -> _Data:
+        """
+        Select specific columns from the data.
+
+        Parameters
+        ----------
+        keys : Iterable[str] or str
+            The keys of the columns to select.
+
+        Returns
+        -------
+        _Data
+            The _Data object with only the selected columns.
+        """
         if isinstance(keys, str):
             keys = [keys]
 
@@ -136,47 +363,135 @@ def select_columns(self, keys: Iterable[str] | str) -> _Data:
              for index, row in self.data.items()})
 
     def drop(self, keys: Iterable[str] | str) -> _Data:
-        # Might be depreciated?
-
+        """
+        Drop specific columns from the data.
+
+        Parameters
+        ----------
+        keys : Iterable[str] or str
+            The keys of the columns to drop.
+
+        Returns
+        -------
+        _Data
+            The _Data object with the specified columns removed.
+        """
         if isinstance(keys, str):
             keys = [keys]
 
-        for row in self.data:
+        for row in self:
             for key in keys:
                 if key in row:
-                    del self.data[row][key]
+                    del row[key]
+
+    def join(self, __o: _Data) -> _Data:
+        """
+        Join another _Data object with this one.
+
+        Parameters
+        ----------
+        __o : _Data
+            The other _Data object to join with this one.
+
+        Returns
+        -------
+        _Data
+            The combined _Data object.
+        """
+        _data = deepcopy(self)
+        for row, other_row in zip(_data, __o):
+            row.update(other_row)
+
+        return _data
+
+#                                                                     Modifying
+# =============================================================================
+
+    def n_best_samples(self, nosamples: int, key: str) -> pd.DataFrame:
+        """
+        Get the top N samples based on a specific key.
+
+        Parameters
+        ----------
+        nosamples : int
+            The number of samples to retrieve.
+        key : str
+            The key to sort the samples by.
+
+        Returns
+        -------
+        pd.DataFrame
+            The DataFrame with the top N samples.
+        """
+        df = self.to_dataframe()
+        return df.nsmallest(n=nosamples, columns=key)
 
     def add_column(self, key: str):
+        """
+        Add a new column to the data with missing values.
+
+        Parameters
+        ----------
+        key : str
+            The key for the new column.
+        """
         for row in self.data:
             self.data[row][key] = MISSING_VALUE
 
     def remove(self, rows: Iterable[int]):
+        """
+        Remove specific rows from the data.
+
+        Parameters
+        ----------
+        rows : Iterable[int]
+            The rows to remove.
+        """
         for row in rows:
-            del self.data[row]  # = deleting the row
+            del self.data[row]
 
     def overwrite(self, rows: Iterable[int], __o: _Data):
+        """
+        Overwrite specific rows with data from another _Data object.
+
+        Parameters
+        ----------
+        rows : Iterable[int]
+            The rows to overwrite.
+        __o : _Data
+            The _Data object to overwrite the rows with.
+        """
         for index, other_row in zip(rows, __o):
             self.data[index] = other_row
 
-    def join(self, __o: _Data) -> _Data:
-        _data = deepcopy(self)
-        for row, other_row in zip(_data, __o):
-            row.update(other_row)
-
-        return _Data(_data)
-
-    def get_data_dict(self, row: int) -> Dict[str, Any]:
-        return self.data[row]
-
     def set_data(self, row: int, value: Any, key: str):
+        """
+        Set a specific value in the data.
+
+        Parameters
+        ----------
+        row : int
+            The row to set the value in.
+        value : Any
+            The value to set.
+        key : str
+            The key for the value.
+        """
         self.data[row][key] = value
 
     def reset_index(self, rows: Iterable[int] = None):
-        self.data = {index: values for index, values in enumerate(self.data)
-                     }
+        """
+        Reset the index of the data.
 
-    def is_empty(self) -> bool:
-        return not bool(self.data)
+        Parameters
+        ----------
+        rows : Iterable[int], optional
+            The rows to reset the index for.
+
+        """
+        self.data = {index: values for index, values in enumerate(self)}
+
+# =============================================================================
 
 
 def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
@@ -193,7 +508,9 @@ def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
     _Data
         The data object.
     """
-    return _Data({0: {dictionary}})
+    return _Data({0: dictionary})
+
+# =============================================================================
 
 
 def _data_factory(data: DataTypes) -> _Data:
@@ -217,5 +534,7 @@ def _data_factory(data: DataTypes) -> _Data:
             f"Data must be of type _Data, pd.DataFrame, np.ndarray, "
             f"Path or str, not {type(data)}")
 
+# =============================================================================
+
 
 DataTypes = Union[pd.DataFrame, np.ndarray, Path, str, _Data]
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
index 9e762296..0a2cc770 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
@@ -142,11 +142,6 @@ def __len__(self):
         """The len() method returns the number of datapoints"""
         return len(self._jobs)
 
-        # if self._input_data.is_empty():
-        #     return len(self._output_data)
-
-        # return len(self._input_data)
-
     def __iter__(self) -> Iterator[Tuple[Dict[str, Any]]]:
         self.current_index = 0
         return self
diff --git a/tests/newdata/conftest.py b/tests/newdata/conftest.py
index be072701..acde62e9 100644
--- a/tests/newdata/conftest.py
+++ b/tests/newdata/conftest.py
@@ -1,40 +1,31 @@
 import numpy as np
 import pytest
 
-from f3dasm._src.experimentdata._columns import _Columns
-from f3dasm._src.experimentdata._newdata import _Index
+from f3dasm._src.experimentdata._experimental._newdata2 import _Data
 from f3dasm.design import Domain
 
 
 @pytest.fixture(scope="package")
 def list_1():
-    return [[np.array([0.3, 5.0, 0.34]), 'd', 3], [np.array(
-        [0.23, 5.0, 0.0]), 'f', 4], [np.array([0.3, 5.0, 0.2]), 'c', 0]]
-
-
-@pytest.fixture(scope="package")
-def columns_1():
-    return _Columns({'a': None, 'b': None, 'c': None})
-
-
-@pytest.fixture(scope="package")
-def indices_1():
-    return _Index([3, 5, 6])
+    return {0: {'a': np.array([0.3, 5.0, 0.34]), 'b': 'd', 'c': 3},
+            1: {'a': np.array([0.23, 5.0, 0.0]), 'b': 'f', 'c': 4},
+            2: {'a': np.array([0.3, 5.0, 0.2]), 'b': 'c', 'c': 0}
+            }
 
 
 @pytest.fixture(scope="package")
 def list_2():
-    return [[np.array([0.3, 0.2])], [np.array([0.4, 0.3])], [np.array([0.0, 1.0])]]
-
-
-@pytest.fixture(scope="package")
-def columns_2():
-    return _Columns({'a': None})
+    return {0: {'a': np.array([0.3, 0.2])},
+            1: {'a': np.array([0.4, 0.3]), 'b': np.array([0.0, 1.0])}
+            }
 
 
 @pytest.fixture(scope="package")
 def list_3():
-    return [[np.array([1.1, 0.2])], [np.array([8.9, 0.3])], [np.array([0.0, 0.87])]]
+    return {0: {'a': np.array([1.1, 0.2])},
+            1: {'a': np.array([8.9, 0.3])},
+            2: {'a': np.array([0.0, 0.87])}
+            }
 
 
 @pytest.fixture(scope="package")
diff --git a/tests/newdata/test_data.py b/tests/newdata/test_data.py
index 38b1b0ce..fb5f0cba 100644
--- a/tests/newdata/test_data.py
+++ b/tests/newdata/test_data.py
@@ -1,292 +1,287 @@
 from copy import deepcopy
-from typing import Any, List
+from typing import Any, Dict, List
 
 import numpy as np
 import pandas as pd
 import pytest
+import xarray as xr
 
-from f3dasm._src.experimentdata._columns import _Columns
-from f3dasm._src.experimentdata._newdata import _Data, _Index
-from f3dasm.design import Domain
+from f3dasm._src.experimentdata._experimental._newdata2 import (
+    _convert_dict_to_data, _Data, _data_factory)
 
 pytestmark = pytest.mark.smoke
 
-DataType = List[List[Any]]
+DataType = Dict[int, Dict[str, Any]]
 
+#                                                                Initialization
+# =============================================================================
 
-def test_init(list_1: DataType):
-    data = _Data(list_1)
-    assert data.data == list_1
-    assert data.columns.names == [0, 1, 2]
-    assert data.indices.equals(pd.Index([0, 1, 2]))
 
+def test_init():
+    data = _Data({0: {"a": 1, "b": 2}})
+    assert len(data) == 1
+    assert not data.is_empty()
+    assert data.data == {0: {"a": 1, "b": 2}}
 
-def test_init_with_columns(list_1: DataType, columns_1: _Columns):
-    data = _Data(list_1, columns_1)
-    assert data.data == list_1
-    assert data.names == ['a', 'b', 'c']
 
+def test_init_empty():
+    data = _Data()
+    assert len(data) == 0
+    assert data.is_empty()
 
-def test_init_with_columns_and_indices(
-        list_1: DataType, columns_1: _Columns, indices_1: _Index):
-    data = _Data(list_1, columns_1, indices_1)
-    assert data.data == list_1
-    assert data.names == ['a', 'b', 'c']
-    assert data.indices.equals(pd.Index([3, 5, 6]))
 
+def test_init_with_data():
+    input_data = {0: {"a": 1, "b": 2}}
+    data = _Data(input_data)
+    assert len(data) == 1
+    assert not data.is_empty()
+    assert data.data == input_data
 
-def test__len__(list_1: DataType):
-    data = _Data(list_1)
-    assert len(data) == 3
 
+def test_from_numpy():
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+    data = _Data.from_numpy(array)
+    expected_data = {0: {0: 1, 1: 2, 2: 3}, 1: {0: 4, 1: 5, 2: 6}}
+    assert data.data == expected_data
 
-def test__iter__(list_1: DataType):
-    data = _Data(list_1)
-    for i, row in enumerate(data):
-        assert row == list_1[i]
 
+def test_from_numpy_with_keys():
+    array = np.array([[1, 2, 3], [4, 5, 6]])
+    data = _Data.from_numpy(array, keys=["a", "b", "c"])
+    expected_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}}
+    assert data.data == expected_data
 
-def test__getitem__(list_1: DataType):
-    data = _Data(list_1)
-    assert data[0].data[0] == list_1[0]
-    assert data[1].data[0] == list_1[1]
-    assert data[2].data[0] == list_1[2]
 
+def test_from_dataframe():
+    df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+    data = _Data.from_dataframe(df)
+    expected_data = {0: {"a": 1, "b": 3}, 1: {"a": 2, "b": 4}}
+    assert data.data == expected_data
 
-def test__getitem__list(list_1: DataType):
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns(
-        {'a': None, 'b': None, 'c': None}), index=_Index([3, 45]))
-    assert data[[3, 45]].data == data.data
 
+def test_from_indices():
+    data = _Data.from_indices([0, 1])
+    assert data.data == {0: {}, 1: {}}
 
-def test__add__(list_1: DataType, list_3: DataType):
-    data_1 = _Data(list_1)
-    data_2 = _Data(list_3)
-    data_3 = data_1 + data_2
-    assert data_3.data == list_1 + list_3
-    assert data_3.columns.names == [0, 1, 2]
+#                                                                     Exporting
+# =============================================================================
 
 
-def test__add__empty(list_3: DataType):
-    data_1 = _Data(columns=_Columns({0: None, 1: None, 2: None}))
-    data_2 = _Data(list_3)
-    data_3 = data_1 + data_2
-    assert data_3.data == list_3
-    assert data_3.columns.names == [0, 1, 2]
+def test_to_numpy():
+    input_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}}
+    data = _Data(input_data)
+    np_array = data.to_numpy()
+    expected_array = np.array([[1, 2], [3, 4]])
+    np.testing.assert_array_equal(np_array, expected_array)
 
 
-def test__eq__(list_1: DataType):
-    data_1 = _Data(list_1)
-    data_2 = _Data(list_1)
-    assert data_1 == data_2
+def test_to_dataframe():
+    input_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}}
+    data = _Data(input_data)
+    df = data.to_dataframe()
+    expected_df = pd.DataFrame({"a": [1, 3], "b": [2, 4]})
+    pd.testing.assert_frame_equal(df, expected_df)
 
 
-def test_repr_html(list_1: DataType):
-    data = _Data(list_1)
-    assert data._repr_html_() == data.to_dataframe()._repr_html_()
+def test_to_xarray():
+    input_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}}
+    data = _Data(input_data)
+    xarray = data.to_xarray('test')
+    expected_xarray = xr.DataArray(
+        [[1, 2], [3, 4]], dims=["iterations", "test"],
+        coords={"iterations": [0, 1], "test": ["a", "b"]})
+    xr.testing.assert_equal(xarray, expected_xarray)
 
-#                                                                    Properties
-# =============================================================================
 
+def test_get_data_dict():
+    input_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}}
+    data = _Data(input_data)
+    assert data.get_data_dict(0) == {"a": 1, "b": 2}
 
-def test_names(list_1: DataType, columns_1: _Columns):
-    data = _Data(list_1, columns=columns_1)
-    assert data.names == ['a', 'b', 'c']
 
+def test_convert_dict_to_data():
+    dictionary = {"a": 1, "b": 2}
+    data = _convert_dict_to_data(dictionary)
+    expected_data = _Data({0: {"a": 1, "b": 2}})
+    assert data == expected_data
 
-def test_names_default(list_1: DataType):
-    data = _Data(list_1)
-    assert data.names == [0, 1, 2]
+#                                                                    Properties
+# =============================================================================
 
 
-def test_indices(list_1: DataType, indices_1: _Index):
-    data = _Data(list_1, index=indices_1)
-    assert data.indices.equals(pd.Index([3, 5, 6]))
+def test_len():
+    data = _Data({0: {"a": 1}, 1: {"a": 2}})
+    assert len(data) == 2
 
 
-def test_indices_default(list_1: DataType):
-    data = _Data(list_1)
-    assert data.indices.equals(pd.Index([0, 1, 2]))
+def test_indices():
+    data = _Data({0: {"a": 1}, 1: {"a": 2}})
+    assert data.indices == [0, 1]
 
-#                                                      Alternative constructors
-# =============================================================================
 
-
-def test_from_indices():
-    data = _Data.from_indices(pd.Index([0, 1]))
-    assert data.indices.equals(pd.Index(([0, 1])))
-    assert not data.names
-    assert data.is_empty()
+def test_names():
+    data = _Data({0: {"a": 1}, 1: {"a": 2}})
+    assert data.names == ["a"]
 
 
-def test_from_domain(domain: Domain):
-    data = _Data.from_domain(domain)
-    assert data.indices.equals(pd.Index([]))
-    assert data.names == ['a', 'b', 'c', 'd', 'e']
+def test_is_empty():
+    data = _Data()
     assert data.is_empty()
+    data = _Data({0: {"a": 1}})
+    assert not data.is_empty()
 
 
-def test_from_numpy():
-    data = _Data.from_numpy(np.array([[1, 2, 3], [4, 5, 6]]))
-    assert data.data == [[1, 2, 3], [4, 5, 6]]
-    assert data.names == [0, 1, 2]
-    assert data.indices.equals(pd.Index([0, 1]))
+def test_getitem():
+    data = _Data({0: {"a": 1}, 1: {"a": 2}})
+    assert data[0] == _Data({0: {"a": 1}})
+    assert data[1] == _Data({1: {"a": 2}})
+    assert data[[0, 1]] == data
 
 
-def test_from_dataframe():
-    data = _Data.from_dataframe(pd.DataFrame([[1, 2, 3], [4, 5, 6]]))
-    assert data.data == [[1, 2, 3], [4, 5, 6]]
-    assert data.names == [0, 1, 2]
-    assert data.indices.equals(pd.Index([0, 1]))
+def test_repr():
+    data = _Data({0: {"a": 1}, 1: {"a": 2}})
+    assert isinstance(data.__repr__(), str)
 
 
-def test_reset():
-    data = _Data.from_numpy(np.array([[1, 2, 3], [4, 5, 6]]))
-    data.reset()
-    assert data.data == []
-    assert not data.names
-    assert data.indices.equals(pd.Index([]))
+def test_repr_html():
+    data = _Data({0: {"a": 1}, 1: {"a": 2}})
+    assert isinstance(data._repr_html_(), str)
+#                                                       Selecting and combining
+# =============================================================================
 
 
-def test_reset_with_domain(domain: Domain):
-    data = _Data.from_numpy(np.array([[1, 2, 3], [4, 5, 6]]))
-    data.reset(domain)
-    assert data.data == []
-    assert data.names == domain.names
-    assert data.indices.equals(pd.Index([]))
+def test_join():
+    data1 = _Data({0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}})
+    data2 = _Data({0: {"c": 5, "d": 6}, 1: {"c": 7, "d": 8}})
+    data3 = data1.join(data2)
+    expected_data = {0: {"a": 1, "b": 2, "c": 5, "d": 6},
+                     1: {"a": 3, "b": 4, "c": 7, "d": 8}}
+    assert data3 == _Data(expected_data)
 
-#                                                                        Export
-# =============================================================================
 
+def test_select_columns():
+    input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}}
+    data = _Data(input_data)
+    selected_data = data.select_columns(["a", "c"])
+    expected_data = {0: {"a": 1, "c": 3}, 1: {"a": 4, "c": 6}}
+    assert selected_data.data == expected_data
 
-def test_to_numpy(list_1: DataType):
-    data = _Data(list_1)
-    data.to_numpy()
 
+def test_select_columns_single():
+    input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}}
+    data = _Data(input_data)
+    selected_data = data.select_columns("a")
+    expected_data = {0: {"a": 1}, 1: {"a": 4}}
+    assert selected_data.data == expected_data
 
-def to_dataframe(list_1: DataType):
-    data = _Data(list_1)
-    data.to_dataframe()
-    assert data.to_dataframe().equals(pd.DataFrame(list_1))
 
+def test_drop():
+    input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}}
+    data = _Data(input_data)
+    data.drop(["b"])
+    expected_data = {0: {"a": 1, "c": 3}, 1: {"a": 4, "c": 6}}
+    assert data.data == expected_data
 
-def test_select_columns(list_1: DataType, columns_1: _Columns):
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=columns_1)
-    new_data = data.select_columns(['a', 'c'])
-    assert new_data.names == ['a', 'c']
-    assert new_data.data == [[1, 3], [4, 6]]
 
+def test_drop_single_key():
+    input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}}
+    data = _Data(input_data)
+    data.drop("b")
+    expected_data = {0: {"a": 1, "c": 3}, 1: {"a": 4, "c": 6}}
+    assert data.data == expected_data
 
-def test_select_column(list_1: DataType, columns_1: _Columns):
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=columns_1)
-    new_data = data.select_columns('a')
-    assert new_data.names == ['a']
-    assert new_data.data == [[1], [4]]
+#                                                                     Modifying
+# =============================================================================
 
 
-def test_add(list_2: DataType, list_3: DataType):
-    data_0 = _Data(deepcopy(list_2))
-    data_1 = _Data(deepcopy(list_2))
-    data_2 = _Data(list_3)
-    data_1.add(data_2.to_dataframe())
-    assert data_1 == (data_0 + data_2)
+def test_add():
+    data1 = _Data({0: {"a": 1, "b": 2}})
+    data2 = _Data({0: {"a": 3, "b": 4}})
+    data3 = data1 + data2
+    expected_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}}
+    assert data3.data == expected_data
 
 
-def test_add_empty_rows():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]])
-    data.add_empty_rows(2)
-    assert data.data == [[1, 2, 3], [4, 5, 6], [
-        np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]]
+def test_add_empty():
+    data1 = _Data()
+    data2 = _Data({0: {"a": 3, "b": 4}})
+    data3 = data1 + data2
+    assert data3.data == {0: {"a": 3, "b": 4}}
 
 
 def test_add_column():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]])
-    data.add_column('a')
-    assert data.data == [[1, 2, 3, np.nan], [4, 5, 6, np.nan]]
-    assert data.names == [0, 1, 2, 'a']
+    missing_value = np.nan
+    data = _Data({0: {"a": 1}, 1: {"a": 2}})
+    data.add_column("b")
+    expected_data = {0: {"a": 1, "b": missing_value},
+                     1: {"a": 2, "b": missing_value}}
+    assert data.data == expected_data
 
 
-def test_remove():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]])
-    data.remove(0)
-    assert data.data == [[4, 5, 6]]
-    assert data.names == [0, 1, 2]
-
-
-def test_remove_list():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    data.remove([0, 2])
-    assert data.data == [[4, 5, 6]]
-    assert data.names == [0, 1, 2]
+def test_overwrite():
+    data = _Data({0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}})
+    data2 = _Data({0: {"a": 5, "b": 6}, 1: {"a": 7, "b": 8}})
+    data.overwrite([0], data2)
+    assert data.data == {0: {"a": 5, "b": 6}, 1: {"a": 3, "b": 4}}
 
 
-def test_get_data_dict():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]])
-    assert data.get_data_dict(0) == {0: 1, 1: 2, 2: 3}
+def test_remove():
+    data = _Data({0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}})
+    data.remove([1])
+    assert data.data == {0: {"a": 1, "b": 2}}
 
 
-def test_set_data_all_columns():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]])
-    data.set_data(index=0, value=[4, 5, 6])
-    assert data.data == [[4, 5, 6], [4, 5, 6]]
+def test_n_best_samples():
+    df = pd.DataFrame({"a": [3, 1, 2], "b": [6, 4, 5]})
+    data = _Data.from_dataframe(df)
+    best_samples = data.n_best_samples(2, "a")
+    expected_df = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=[1, 2])
+    pd.testing.assert_frame_equal(best_samples, expected_df)
 
 
 def test_set_data():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns(
-        {'a': None, 'b': None, 'c': None}))
-    data.set_data(index=0, value=99, column='b')
-    assert data.data == [[1, 99, 3], [4, 5, 6]]
+    data = _Data({0: {"a": 1}})
+    data.set_data(0, 2, "a")
+    assert data.data[0]["a"] == 2
 
 
-def test_set_data_no_valid_index():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns(
-        {'a': None, 'b': None, 'c': None}))
-    with pytest.raises(IndexError):
-        data.set_data(index=2, value=99, column='b')
+def test_reset_index():
+    data = _Data({1: {"a": 1}, 3: {"a": 2}})
+    data.reset_index()
+    expected_data = {0: {"a": 1}, 1: {"a": 2}}
+    assert data.data == expected_data
 
 
-def test_set_data_unknown_column():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns(
-        {'a': None, 'b': None, 'c': None}))
+def test_data_factory_pandas():
+    df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
+    data = _data_factory(df)
+    expected_data = _Data.from_dataframe(df)
+    assert data == expected_data
 
-    data.set_data(index=0, value=99, column='d')
-    assert data.names == ['a', 'b', 'c', 'd']
-    assert data.data == [[1, 2, 3, 99], [4, 5, 6, np.nan]]
 
+def test_data_factory_numpy():
+    np_array = np.array([[1, 2], [3, 4]])
+    data = _data_factory(np_array)
+    expected_data = _Data.from_numpy(np_array)
+    assert data == expected_data
 
-def test_reset_index():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns(
-        {'a': None, 'b': None, 'c': None}), index=_Index([3, 45]))
-    data.reset_index()
-    assert data.indices.equals(pd.Index([0, 1]))
 
+def test_data_factory_none():
+    data = _data_factory(None)
+    expected_data = _Data()
+    assert data == expected_data
 
-def test_is_empty():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns(
-        {'a': None, 'b': None, 'c': None}), index=_Index([3, 45]))
-    assert not data.is_empty()
-    data.reset()
-    assert data.is_empty()
 
+def test_data_factory_unrecognized_datatype():
+    with pytest.raises(TypeError):
+        _ = _data_factory(0)
 
-def test_has_columnnames():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns(
-        {'a': None, 'b': None, 'c': None}), index=_Index([3, 45]))
-    assert not data.has_columnnames('d')
-    assert data.has_columnnames('c')
-    data.add_column('d')
-    assert data.has_columnnames('d')
 
-
-def test_set_columnnames():
-    data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns(
-        {'a': None, 'b': None, 'c': None}), index=_Index([3, 45]))
-    data.set_columnnames(['d', 'f', 'g'])
-    assert data.names == ['d', 'f', 'g']
+def test_data_factory_data_object():
+    data = _data_factory(_Data({0: {"a": 1}}))
+    expected_data = _Data({0: {"a": 1}})
+    assert data == expected_data
 
 
 if __name__ == "__main__":  # pragma: no cover
     pytest.main()
-
-    # return [[np.array([0.3, 5.0, 0.34]), 'd', 3], [np.array(
-    #     [0.23, 5.0, 0.0]), 'f', 4], [np.array([0.3, 5.0, 0.2]), 'c', 0]]

From 6d9938157b7522b7eb7b0dd801dc273acb127be2 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Tue, 25 Jun 2024 11:26:30 +0200
Subject: [PATCH 10/17] Fix data indexing issue and add column renaming
 functionality

---
 .../_experimental/_jobqueue2.py               |   4 +-
 .../experimentdata/_experimental/_newdata2.py |  30 +-
 .../_experimental/_newexperimentdata2.py      |  20 +-
 tests/newdata/experimentdata/__init__.py      |   0
 tests/newdata/experimentdata/conftest.py      | 130 +++
 .../newdata/experimentdata/test__jobqueue.py  |  43 +
 .../experimentdata/test_experimentdata.py     | 737 ++++++++++++++++++
 tests/newdata/test_data.py                    |  11 +-
 8 files changed, 956 insertions(+), 19 deletions(-)
 create mode 100644 tests/newdata/experimentdata/__init__.py
 create mode 100644 tests/newdata/experimentdata/conftest.py
 create mode 100644 tests/newdata/experimentdata/test__jobqueue.py
 create mode 100644 tests/newdata/experimentdata/test_experimentdata.py

diff --git a/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py b/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
index 3c88308b..82721ace 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
@@ -73,8 +73,8 @@ def __add__(self, __o: Index | str) -> Index:
 
         # Make a copy of other.jobs and modify its index
         other_jobs_copy = deepcopy(__o)
-        other_jobs_copy.jobs.index = range(
-            len(other_jobs_copy)) + self.jobs.index[-1] + 1
+        other_jobs_copy.jobs.index = pd.Index(range(
+            len(other_jobs_copy))) + self.jobs.index[-1] + 1
 
         return Index(pd.concat([self.jobs, other_jobs_copy.jobs]))
 
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
index 4bff29cd..26df0982 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
@@ -148,8 +148,9 @@ def __repr__(self) -> str:
 #                                                                    Properties
 # =============================================================================
 
+
     @property
-    def indices(self) -> List[int]:
+    def indices(self) -> pd.Index:
         """
         Get the indices of the data.
 
@@ -158,7 +159,7 @@ def indices(self) -> List[int]:
         List[int]
             The list of indices.
         """
-        return list(self.data.keys())
+        return pd.Index(list(self.data.keys()))
 
     @property
     def names(self) -> List[str]:
@@ -187,8 +188,9 @@ def is_empty(self) -> bool:
 #                                                                Initialization
 # =============================================================================
 
+
     @classmethod
-    def from_indices(cls, rows: Iterable[int]) -> _Data:
+    def from_indices(cls, rows: Iterable[int] | pd.Index) -> _Data:
         """
         Create a _Data object from a list of indices.
 
@@ -426,7 +428,7 @@ def n_best_samples(self, nosamples: int, key: str) -> pd.DataFrame:
         df = self.to_dataframe()
         return df.nsmallest(n=nosamples, columns=key)
 
-    def add_column(self, key: str):
+    def add_column(self, key: str, exist_ok: bool = True):
         """
         Add a new column to the data with missing values.
 
@@ -436,8 +438,23 @@ def add_column(self, key: str):
             The key for the new column.
         """
         for row in self.data:
+            if not exist_ok and key in self.data[row]:
+                raise KeyError(f"Key '{key}' already exists in the data.")
             self.data[row][key] = MISSING_VALUE
 
+    def rename_columns(self, mapping: Dict[str, str]):
+        """
+        Rename columns in the data.
+
+        Parameters
+        ----------
+        mapping : Dict[str, str]
+            The mapping of old to new column names.
+        """
+        for row in self.data:
+            for old_key, new_key in mapping.items():
+                self.data[row][new_key] = self.data[row].pop(old_key)
+
     def remove(self, rows: Iterable[int]):
         """
         Remove specific rows from the data.
@@ -513,7 +530,8 @@ def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
 # =============================================================================
 
 
-def _data_factory(data: DataTypes) -> _Data:
+def _data_factory(data: DataTypes,
+                  keys: Optional[Iterable[str]] = None) -> _Data:
     if data is None:
         return _Data()
 
@@ -527,7 +545,7 @@ def _data_factory(data: DataTypes) -> _Data:
         return _Data.from_file(Path(data))
 
     elif isinstance(data, np.ndarray):
-        return _Data.from_numpy(data)
+        return _Data.from_numpy(data, keys=keys)
 
     else:
         raise TypeError(
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
index 0a2cc770..7851f30f 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
@@ -110,8 +110,12 @@ def __init__(self,
 
         self.project_dir = _project_dir_factory(project_dir)
 
-        self._input_data = _data_factory(input_data)
-        self._output_data = _data_factory(output_data)
+        if isinstance(input_data, np.ndarray) and isinstance(domain, Domain):
+            self._input_data = _data_factory(input_data, domain.names)
+            self._output_data = _data_factory(output_data, domain.output_names)
+        else:
+            self._input_data = _data_factory(input_data)
+            self._output_data = _data_factory(output_data)
 
         # Create empty output_data from indices if output_data is empty
         if self._output_data.is_empty():
@@ -134,9 +138,8 @@ def __init__(self,
 
         # For backwards compatibility; if the output_data has
         #  only one column, rename it to 'y'
-        # TODO: Fix this for newdata2
         if self._output_data.names == [0]:
-            self._output_data.columns.set_columnnames(['y'])
+            self._output_data.rename_columns({0: 'y'})
 
     def __len__(self):
         """The len() method returns the number of datapoints"""
@@ -944,7 +947,7 @@ def _set_experiment_sample(self,
 
             self._output_data.set_data(
                 row=experiment_sample.job_number, value=value,
-                column=column)
+                key=column)
 
         self._jobs.mark(experiment_sample._jobnumber, status=Status.FINISHED)
 
@@ -997,11 +1000,10 @@ def _set_error(self, index: int) -> None:
         index
             index of the experiment_sample to mark as error
         """
-        # self.jobs.mark_as_error(index)
         self._jobs.mark(index, status=Status.ERROR)
-        self._output_data.set_data(
-            index,
-            value=['ERROR' for _ in self._output_data.names])
+        for column in self._output_data.names:
+            self._output_data.set_data(
+                index, value='ERROR', key=column)
 
     @_access_file
     def _write_error(self, index: int):
diff --git a/tests/newdata/experimentdata/__init__.py b/tests/newdata/experimentdata/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/newdata/experimentdata/conftest.py b/tests/newdata/experimentdata/conftest.py
new file mode 100644
index 00000000..68189f88
--- /dev/null
+++ b/tests/newdata/experimentdata/conftest.py
@@ -0,0 +1,130 @@
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import pytest
+import xarray as xr
+
+from f3dasm._src.design.parameter import (_CategoricalParameter,
+                                          _ContinuousParameter,
+                                          _DiscreteParameter)
+from f3dasm._src.experimentdata._experimental._newexperimentdata2 import \
+    ExperimentData
+from f3dasm.design import Domain, make_nd_continuous_domain
+
+SEED = 42
+
+
+@pytest.fixture(scope="package")
+def seed() -> int:
+    return SEED
+
+
+@pytest.fixture(scope="package")
+def domain() -> Domain:
+
+    space = {
+        'x1': _ContinuousParameter(-5.12, 5.12),
+        'x2': _DiscreteParameter(-3, 3),
+        'x3': _CategoricalParameter(["red", "green", "blue"])
+    }
+
+    return Domain(space=space)
+
+
+@pytest.fixture(scope="package")
+def domain_continuous() -> Domain:
+    return make_nd_continuous_domain(bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]), dimensionality=3)
+
+
+@pytest.fixture(scope="package")
+def experimentdata(domain: Domain) -> ExperimentData:
+    e_data = ExperimentData(domain)
+    e_data.sample(sampler='random', n_samples=10, seed=SEED)
+    return e_data
+
+
+@pytest.fixture(scope="package")
+def experimentdata2(domain: Domain) -> ExperimentData:
+    return ExperimentData.from_sampling(sampler='random', domain=domain, n_samples=10, seed=SEED)
+
+
+@pytest.fixture(scope="package")
+def experimentdata_continuous(domain_continuous: Domain) -> ExperimentData:
+    return ExperimentData.from_sampling(sampler='random', domain=domain_continuous, n_samples=10, seed=SEED)
+
+
+@pytest.fixture(scope="package")
+def experimentdata_expected() -> ExperimentData:
+    domain_continuous = make_nd_continuous_domain(
+        bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]), dimensionality=3)
+    data = ExperimentData.from_sampling(
+        sampler='random', domain=domain_continuous, n_samples=10, seed=SEED)
+    for es, output in zip(data, np.zeros((10, 1))):
+        es.store(name='y', object=float(output))
+        data._set_experiment_sample(es)
+    data.add(input_data=np.array([[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]),
+             output_data=np.array([[0.0], [0.0]]), domain=data.domain)
+
+    # data._input_data.data = data._input_data.data.round(6)
+    return data
+
+
+@pytest.fixture(scope="package")
+def experimentdata_expected_no_output() -> ExperimentData:
+    domain_continuous = make_nd_continuous_domain(
+        bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]), dimensionality=3)
+    data = ExperimentData.from_sampling(
+        sampler='random', domain=domain_continuous, n_samples=10, seed=SEED)
+    data.add(input_data=np.array(
+        [[0.0, 0.0, 0.0], [1.0, 1.0, 1.0]]), domain=domain_continuous)
+
+    # data._input_data.data = data._input_data.data.round(6)
+
+    return data
+
+
+@pytest.fixture(scope="package")
+def experimentdata_expected_only_domain() -> ExperimentData:
+    domain_continuous = make_nd_continuous_domain(
+        bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]), dimensionality=3)
+    return ExperimentData(domain=domain_continuous)
+
+
+@pytest.fixture(scope="package")
+def numpy_array(domain_continuous: Domain) -> np.ndarray:
+    rng = np.random.default_rng(SEED)
+    return rng.random((10, len(domain_continuous)))
+
+
+@pytest.fixture(scope="package")
+def numpy_output_array(domain_continuous: Domain) -> np.ndarray:
+    return np.zeros((10, 1))
+
+
+@pytest.fixture(scope="package")
+def xarray_dataset(domain_continuous: Domain) -> xr.Dataset:
+    rng = np.random.default_rng(SEED)
+    # np.random.seed(SEED)
+    input_data = rng.random((10, len(domain_continuous)))
+    input_names = domain_continuous.names
+
+    output_data = pd.DataFrame()
+    output_names = output_data.columns.to_list()
+
+    return xr.Dataset({'input': xr.DataArray(input_data, dims=['iterations', 'input_dim'], coords={
+        'iterations': range(len(input_data)), 'input_dim': input_names}),
+        'output': xr.DataArray(output_data, dims=['iterations', 'output_dim'], coords={
+            'iterations': range(len(output_data)), 'output_dim': output_names})})
+
+
+@pytest.fixture(scope="package")
+def pandas_dataframe(domain_continuous: Domain) -> pd.DataFrame:
+    # np.random.seed(SEED)
+    rng = np.random.default_rng(SEED)
+    return pd.DataFrame(rng.random((10, len(domain_continuous))), columns=domain_continuous.names)
+
+
+@pytest.fixture(scope="package")
+def continuous_parameter() -> _ContinuousParameter:
+    return _ContinuousParameter(lower_bound=0., upper_bound=1.)
diff --git a/tests/newdata/experimentdata/test__jobqueue.py b/tests/newdata/experimentdata/test__jobqueue.py
new file mode 100644
index 00000000..52010733
--- /dev/null
+++ b/tests/newdata/experimentdata/test__jobqueue.py
@@ -0,0 +1,43 @@
+import pandas as pd
+
+from f3dasm._src.experimentdata._experimental._jobqueue2 import \
+    Index as _JobQueue
+
+# from f3dasm._src.experimentdata._jobqueue import _JobQueue
+
+
+def test_select_all_with_matching_status():
+    # Create a job queue with some jobs
+    job_queue = _JobQueue()
+    job_queue.jobs = pd.Series(
+        ['in progress', 'running', 'completed', 'in progress', 'failed'])
+
+    # Select all jobs with status 'in progress'
+    selected_jobs = job_queue.select_all('in progress')
+
+    # Check if the selected jobs match the expected result
+    assert (selected_jobs.jobs == ['in progress', 'in progress']).all()
+
+
+def test_select_all_with_no_matching_status():
+    # Create a job queue with some jobs
+    job_queue = _JobQueue()
+    job_queue.jobs = pd.Series(
+        ['in progress', 'running', 'completed', 'in progress', 'failed'])
+
+    # Select all jobs with status 'cancelled'
+    selected_jobs = job_queue.select_all('cancelled')
+
+    # Check if the selected jobs match the expected result
+    assert selected_jobs.jobs.empty
+
+
+def test_select_all_with_empty_job_queue():
+    # Create an empty job queue
+    job_queue = _JobQueue()
+
+    # Select all jobs with status 'in progress'
+    selected_jobs = job_queue.select_all('in progress')
+
+    # Check if the selected jobs match the expected result
+    assert selected_jobs.jobs.empty
diff --git a/tests/newdata/experimentdata/test_experimentdata.py b/tests/newdata/experimentdata/test_experimentdata.py
new file mode 100644
index 00000000..026945e6
--- /dev/null
+++ b/tests/newdata/experimentdata/test_experimentdata.py
@@ -0,0 +1,737 @@
+from __future__ import annotations
+
+import csv
+import pickle
+from pathlib import Path
+from typing import Iterable
+
+import numpy as np
+import pandas as pd
+import pytest
+import xarray as xr
+
+from f3dasm import ExperimentSample
+from f3dasm._src.design.parameter import _ContinuousParameter
+from f3dasm._src.experimentdata._experimental._jobqueue2 import \
+    Index as _JobQueue
+from f3dasm._src.experimentdata._experimental._newdata2 import DataTypes, _Data
+from f3dasm._src.experimentdata._experimental._newexperimentdata2 import \
+    ExperimentData
+from f3dasm.design import Domain, Status, make_nd_continuous_domain
+
+pytestmark = pytest.mark.smoke
+
+SEED = 42
+
+
+def test_check_experimentdata(experimentdata: ExperimentData):
+    assert isinstance(experimentdata, ExperimentData)
+
+# Write test functions
+
+
+def test_experiment_data_init(experimentdata: ExperimentData, domain: Domain):
+    assert experimentdata.domain == domain
+    assert experimentdata.project_dir == Path.cwd()
+    # Add more assertions as needed
+
+
+def test_experiment_data_add(experimentdata: ExperimentData,
+                             experimentdata2: ExperimentData, domain: Domain):
+    experimentdata_total = ExperimentData(domain)
+    experimentdata_total.add_experiments(experimentdata)
+    experimentdata_total.add_experiments(experimentdata2)
+    assert experimentdata_total == experimentdata + experimentdata2
+
+
+def test_experiment_data_len_empty(domain: Domain):
+    experiment_data = ExperimentData(domain)
+    assert len(experiment_data) == 0  # Update with the expected length
+
+
+def test_experiment_data_len_equals_input_data(experimentdata: ExperimentData):
+    assert len(experimentdata) == len(experimentdata._input_data)
+
+
+@pytest.mark.parametrize("slice_type", [3, [0, 1, 3]])
+def test_experiment_data_select(slice_type: int | Iterable[int], experimentdata: ExperimentData):
+    input_data = experimentdata._input_data[slice_type]
+    output_data = experimentdata._output_data[slice_type]
+    jobs = experimentdata._jobs[slice_type]
+    constructed_experimentdata = ExperimentData(
+        input_data=input_data, output_data=output_data, jobs=jobs, domain=experimentdata.domain)
+    assert constructed_experimentdata == experimentdata.select(slice_type)
+
+#                                                                           Constructors
+# ======================================================================================
+
+
+def test_from_file(experimentdata_continuous: ExperimentData, seed: int, tmp_path: Path):
+    # experimentdata_continuous.filename = tmp_path / 'test001'
+    experimentdata_continuous.store(tmp_path / 'experimentdata')
+
+    experimentdata_from_file = ExperimentData.from_file(
+        tmp_path / 'experimentdata')
+
+    # Check if the input_data attribute of ExperimentData matches the expected_data
+    pd.testing.assert_frame_equal(
+        experimentdata_continuous._input_data.to_dataframe(), experimentdata_from_file._input_data.to_dataframe(), check_dtype=False, atol=1e-6)
+    pd.testing.assert_frame_equal(experimentdata_continuous._output_data.to_dataframe(),
+                                  experimentdata_from_file._output_data.to_dataframe())
+    pd.testing.assert_series_equal(
+        experimentdata_continuous._jobs.jobs, experimentdata_from_file._jobs.jobs)
+    # assert experimentdata_continuous.input_data == experimentdata_from_file.input_data
+    assert experimentdata_continuous._output_data == experimentdata_from_file._output_data
+    assert experimentdata_continuous.domain == experimentdata_from_file.domain
+    assert experimentdata_continuous._jobs == experimentdata_from_file._jobs
+
+
+def test_from_file_wrong_name(experimentdata_continuous: ExperimentData, seed: int, tmp_path: Path):
+    experimentdata_continuous.filename = tmp_path / 'test001'
+    experimentdata_continuous.store()
+
+    with pytest.raises(FileNotFoundError):
+        _ = ExperimentData.from_file(tmp_path / 'experimentdata')
+
+
+def test_from_sampling(experimentdata_continuous: ExperimentData, seed: int):
+    # sampler = RandomUniform(domain=experimentdata_continuous.domain, number_of_samples=10, seed=seed)
+    experimentdata_from_sampling = ExperimentData.from_sampling(sampler='random',
+                                                                domain=experimentdata_continuous.domain,
+                                                                n_samples=10, seed=seed)
+    assert experimentdata_from_sampling == experimentdata_continuous
+
+
+@pytest.fixture
+def sample_csv_inputdata(tmp_path):
+    # Create sample CSV files for testing
+    input_csv_file = tmp_path / 'experimentdata_data.csv'
+
+    # Create sample input and output dataframes
+    input_data = pd.DataFrame(
+        {'input_col1': [1, 2, 3], 'input_col2': [4, 5, 6]})
+
+    return input_csv_file, input_data
+
+
+@pytest.fixture
+def sample_csv_outputdata(tmp_path):
+    # Create sample CSV files for testing
+    output_csv_file = tmp_path / 'experimentdata_output.csv'
+
+    # Create sample input and output dataframes
+    output_data = pd.DataFrame(
+        {'output_col1': [7, 8, 9], 'output_col2': [10, 11, 12]})
+
+    return output_csv_file, output_data
+
+
+def test_from_object(experimentdata_continuous: ExperimentData):
+    input_data = experimentdata_continuous._input_data
+    output_data = experimentdata_continuous._output_data
+    jobs = experimentdata_continuous._jobs
+    domain = experimentdata_continuous.domain
+    experiment_data = ExperimentData(
+        input_data=input_data, output_data=output_data, jobs=jobs, domain=domain)
+    assert experiment_data == ExperimentData(
+        input_data=input_data, output_data=output_data, jobs=jobs, domain=domain)
+    assert experiment_data == experimentdata_continuous
+
+#                                                                              Exporters
+# ======================================================================================
+
+
+def test_to_numpy(experimentdata_continuous: ExperimentData, numpy_array: np.ndarray):
+    x, y = experimentdata_continuous.to_numpy()
+
+    # cast x to floats
+    x = x.astype(float)
+    # assert if x and numpy_array have all the same values
+    assert np.allclose(x, numpy_array)
+
+
+def test_to_xarray(experimentdata_continuous: ExperimentData, xarray_dataset: xr.Dataset):
+    exported_dataset = experimentdata_continuous.to_xarray()
+    # assert if xr_dataset is equal to xarray
+    assert exported_dataset.equals(xarray_dataset)
+
+
+def test_to_pandas(experimentdata_continuous: ExperimentData, pandas_dataframe: pd.DataFrame):
+    exported_dataframe, _ = experimentdata_continuous.to_pandas()
+    # assert if pandas_dataframe is equal to exported_dataframe
+    pd.testing.assert_frame_equal(
+        exported_dataframe, pandas_dataframe, atol=1e-6, check_dtype=False)
+#                                                                              Exporters
+# ======================================================================================
+
+
+def test_add_new_input_column(experimentdata: ExperimentData,
+                              continuous_parameter: _ContinuousParameter):
+    kwargs = {'low': continuous_parameter.lower_bound,
+              'high': continuous_parameter.upper_bound}
+    experimentdata.add_input_parameter(
+        name='test', type='float', **kwargs)
+    assert 'test' in experimentdata._input_data.names
+
+
+def test_add_new_output_column(experimentdata: ExperimentData):
+    experimentdata.add_output_parameter(name='test', is_disk=False)
+    assert 'test' in experimentdata._output_data.names
+
+
+def test_set_error(experimentdata_continuous: ExperimentData):
+    experimentdata_continuous._set_error(3)
+    assert experimentdata_continuous._jobs.jobs[3] == Status.ERROR
+
+
+# Helper function to create a temporary CSV file with sample data
+def create_sample_csv_input(file_path):
+    data = [
+        ["x0", "x1", "x2"],
+        [0.77395605, 0.43887844, 0.85859792],
+        [0.69736803, 0.09417735, 0.97562235],
+        [0.7611397, 0.78606431, 0.12811363],
+        [0.45038594, 0.37079802, 0.92676499],
+        [0.64386512, 0.82276161, 0.4434142],
+        [0.22723872, 0.55458479, 0.06381726],
+        [0.82763117, 0.6316644, 0.75808774],
+        [0.35452597, 0.97069802, 0.89312112],
+        [0.7783835, 0.19463871, 0.466721],
+        [0.04380377, 0.15428949, 0.68304895],
+        [0.000000, 0.000000, 0.000000],
+        [1.000000, 1.000000, 1.000000],
+    ]
+    with open(file_path, mode='w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerows(data)
+
+
+def create_sample_csv_output(file_path):
+    data = [
+        ["y"],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+
+    ]
+    with open(file_path, mode='w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerows(data)
+
+# Pytest fixture to create a temporary CSV file
+
+
+def create_domain_pickle(filepath):
+    domain = make_nd_continuous_domain(bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]),
+                                       dimensionality=3)
+    domain.store(filepath)
+
+
+def create_jobs_pickle_finished(filepath):
+    domain = make_nd_continuous_domain(bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]),
+                                       dimensionality=3)
+
+    _data_input = _Data.from_dataframe(pd_input())
+    _data_output = _Data.from_dataframe(pd_output())
+    experimentdata = ExperimentData(
+        domain=domain, input_data=_data_input, output_data=_data_output)
+    experimentdata._jobs.store(filepath)
+
+
+def create_jobs_pickle_open(filepath):
+    domain = make_nd_continuous_domain(bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]),
+                                       dimensionality=3)
+
+    _data_input = _Data.from_dataframe(pd_input())
+    experimentdata = ExperimentData(domain=domain, input_data=_data_input)
+    experimentdata._jobs.store(filepath)
+
+
+def path_domain(tmp_path):
+    domain_file_path = tmp_path / "test_domain.pkl"
+    create_domain_pickle(domain_file_path)
+    return domain_file_path
+
+
+def str_domain(tmp_path):
+    domain_file_path = tmp_path / "test_domain.pkl"
+    create_domain_pickle(domain_file_path)
+    return str(domain_file_path)
+
+
+def path_jobs_finished(tmp_path):
+    jobs_file_path = tmp_path / "test_jobs.pkl"
+    create_jobs_pickle_finished(jobs_file_path)
+    return jobs_file_path
+
+
+def str_jobs_finished(tmp_path):
+    jobs_file_path = tmp_path / "test_jobs.pkl"
+    create_jobs_pickle_finished(jobs_file_path)
+    return str(jobs_file_path)
+
+
+def path_jobs_open(tmp_path):
+    jobs_file_path = tmp_path / "test_jobs.pkl"
+    create_jobs_pickle_open(jobs_file_path)
+    return jobs_file_path
+
+
+def str_jobs_open(tmp_path):
+    jobs_file_path = tmp_path / "test_jobs.pkl"
+    create_jobs_pickle_open(jobs_file_path)
+    return str(jobs_file_path)
+
+
+def path_input(tmp_path):
+    csv_file_path = tmp_path / "test_input.csv"
+    create_sample_csv_input(csv_file_path)
+    return csv_file_path
+
+
+def str_input(tmp_path):
+    csv_file_path = tmp_path / "test_input.csv"
+    create_sample_csv_input(csv_file_path)
+    return str(csv_file_path)
+
+
+def path_output(tmp_path: Path):
+    csv_file_path = tmp_path / "test_output.csv"
+    create_sample_csv_output(csv_file_path)
+    return csv_file_path
+
+
+def str_output(tmp_path: Path):
+    csv_file_path = tmp_path / "test_output.csv"
+    create_sample_csv_output(csv_file_path)
+    return str(csv_file_path)
+
+# Pytest test function for reading and monkeypatching a CSV file
+
+
+def numpy_input():
+    return np.array([
+        [0.77395605, 0.43887844, 0.85859792],
+        [0.69736803, 0.09417735, 0.97562235],
+        [0.7611397, 0.78606431, 0.12811363],
+        [0.45038594, 0.37079802, 0.92676499],
+        [0.64386512, 0.82276161, 0.4434142],
+        [0.22723872, 0.55458479, 0.06381726],
+        [0.82763117, 0.6316644, 0.75808774],
+        [0.35452597, 0.97069802, 0.89312112],
+        [0.7783835, 0.19463871, 0.466721],
+        [0.04380377, 0.15428949, 0.68304895],
+        [0.000000, 0.000000, 0.000000],
+        [1.000000, 1.000000, 1.000000],
+    ])
+
+
+def numpy_output():
+    return np.array([
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+
+    ])
+
+
+def pd_input():
+    return pd.DataFrame([
+        [0.77395605, 0.43887844, 0.85859792],
+        [0.69736803, 0.09417735, 0.97562235],
+        [0.7611397, 0.78606431, 0.12811363],
+        [0.45038594, 0.37079802, 0.92676499],
+        [0.64386512, 0.82276161, 0.4434142],
+        [0.22723872, 0.55458479, 0.06381726],
+        [0.82763117, 0.6316644, 0.75808774],
+        [0.35452597, 0.97069802, 0.89312112],
+        [0.7783835, 0.19463871, 0.466721],
+        [0.04380377, 0.15428949, 0.68304895],
+        [0.000000, 0.000000, 0.000000],
+        [1.000000, 1.000000, 1.000000],
+    ], columns=["x0", "x1", "x2"])
+
+
+def pd_output():
+    return pd.DataFrame([
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+        [0.0],
+
+    ], columns=["y"])
+
+
+def data_input():
+    return _Data.from_dataframe(pd_input())
+
+
+def data_output():
+    return _Data.from_dataframe(pd_output())
+
+
+@pytest.mark.parametrize("input_data", [path_input, str_input, pd_input(), data_input(), numpy_input()])
+@pytest.mark.parametrize("output_data", [path_output, str_output, pd_output(), data_output()])
+@pytest.mark.parametrize("domain", [make_nd_continuous_domain(bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]),
+                                                              dimensionality=3), None, path_domain, str_domain])
+@pytest.mark.parametrize("jobs", [None, path_jobs_finished, str_jobs_finished])
+def test_init_with_output(input_data: DataTypes, output_data: DataTypes, domain: Domain | str | Path | None,
+                          jobs: _JobQueue | str | Path | None,
+                          experimentdata_expected: ExperimentData, monkeypatch, tmp_path: Path):
+
+    # if input_data is Callable
+    if callable(input_data):
+        input_data = input_data(tmp_path)
+        expected_data_input = pd.read_csv(input_data)
+
+    # if output_data is Callable
+    if callable(output_data):
+        output_data = output_data(tmp_path)
+        expected_data_output = pd.read_csv(output_data)
+
+    if callable(domain):
+        domain = domain(tmp_path)
+        expected_domain = Domain.from_file(domain)
+
+    if callable(jobs):
+        jobs = jobs(tmp_path)
+        expected_jobs = _JobQueue.from_file(jobs).jobs
+
+    # monkeypatch pd.read_csv to return the expected_data DataFrame
+    def mock_read_csv(*args, **kwargs):
+
+        path = args[0]
+        if isinstance(args[0], str):
+            path = Path(path)
+
+        if path == tmp_path / "test_input.csv":
+            return expected_data_input
+
+        elif path == tmp_path / "test_output.csv":
+            return expected_data_output
+
+        else:
+            raise ValueError("Unexpected file path")
+
+    def mock_load_pickle(*args, **kwargs):
+        return expected_domain
+
+    def mock_pd_read_pickle(*args, **kwargs):
+        path = args[0]
+
+        if isinstance(path, str):
+            path = Path(path)
+
+        if path == tmp_path / "test_jobs.pkl":
+            return expected_jobs
+
+        else:
+            raise ValueError("Unexpected jobs file path")
+
+    monkeypatch.setattr(pd, "read_csv", mock_read_csv)
+    monkeypatch.setattr(pickle, "load", mock_load_pickle)
+    monkeypatch.setattr(pd, "read_pickle", mock_pd_read_pickle)
+
+    if isinstance(input_data, np.ndarray) and domain is None:
+        with pytest.raises(ValueError):
+            ExperimentData(domain=domain, input_data=input_data,
+                           output_data=output_data, jobs=jobs)
+        return
+    # Initialize ExperimentData with the CSV file
+    experiment_data = ExperimentData(domain=domain, input_data=input_data,
+                                     output_data=output_data, jobs=jobs)
+
+    # Check if the input_data attribute of ExperimentData matches the expected_data
+    pd.testing.assert_frame_equal(
+        experiment_data._input_data.to_dataframe(), experimentdata_expected._input_data.to_dataframe(), check_dtype=False, atol=1e-6)
+    pd.testing.assert_frame_equal(experiment_data._output_data.to_dataframe(),
+                                  experimentdata_expected._output_data.to_dataframe(), check_dtype=False)
+
+
+@pytest.mark.parametrize("input_data", [pd_input(), path_input, str_input, data_input(), numpy_input()])
+@pytest.mark.parametrize("output_data", [None])
+@pytest.mark.parametrize("domain", [make_nd_continuous_domain(bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]),
+                                                              dimensionality=3), None, path_domain, str_domain])
+@pytest.mark.parametrize("jobs", [None, path_jobs_open, str_jobs_open])
+def test_init_without_output(input_data: DataTypes, output_data: DataTypes, domain: Domain, jobs: _JobQueue,
+                             experimentdata_expected_no_output: ExperimentData, monkeypatch, tmp_path):
+
+    # if input_data is Callable
+    if callable(input_data):
+        input_data = input_data(tmp_path)
+        expected_data_input = pd.read_csv(input_data)
+
+    # if output_data is Callable
+    if callable(output_data):
+        output_data = output_data(tmp_path)
+        expected_data_output = pd.read_csv(output_data)
+
+    if callable(domain):
+        domain = domain(tmp_path)
+        expected_domain = Domain.from_file(domain)
+
+    if callable(jobs):
+        jobs = jobs(tmp_path)
+        expected_jobs = _JobQueue.from_file(jobs).jobs
+
+    # monkeypatch pd.read_csv to return the expected_data DataFrame
+    def mock_read_csv(*args, **kwargs):
+
+        path = args[0]
+        if isinstance(args[0], str):
+            path = Path(path)
+
+        if path == tmp_path / "test_input.csv":
+            return expected_data_input
+
+        elif path == tmp_path / "test_output.csv":
+            return expected_data_output
+
+        else:
+            raise ValueError("Unexpected file path")
+
+    def mock_load_pickle(*args, **kwargs):
+        return expected_domain
+
+    def mock_pd_read_pickle(*args, **kwargs):
+        path = args[0]
+
+        if isinstance(path, str):
+            path = Path(path)
+
+        if path == tmp_path / "test_jobs.pkl":
+            return expected_jobs
+
+    monkeypatch.setattr(pd, "read_csv", mock_read_csv)
+    monkeypatch.setattr(pickle, "load", mock_load_pickle)
+    monkeypatch.setattr(pd, "read_pickle", mock_pd_read_pickle)
+
+    if isinstance(input_data, np.ndarray) and domain is None:
+        with pytest.raises(ValueError):
+            ExperimentData(domain=domain, input_data=input_data,
+                           output_data=output_data, jobs=jobs)
+        return
+
+    # Initialize ExperimentData with the CSV file
+    experiment_data = ExperimentData(domain=domain, input_data=input_data,
+                                     output_data=output_data, jobs=jobs)
+
+    # Check if the input_data attribute of ExperimentData matches the expected_data
+    pd.testing.assert_frame_equal(
+        experiment_data._input_data.to_dataframe(), experimentdata_expected_no_output._input_data.to_dataframe(), atol=1e-6, check_dtype=False)
+    pd.testing.assert_frame_equal(experiment_data._output_data.to_dataframe(),
+                                  experimentdata_expected_no_output._output_data.to_dataframe())
+    pd.testing.assert_series_equal(
+        experiment_data._jobs.jobs, experimentdata_expected_no_output._jobs.jobs)
+    # assert experiment_data.domain == experimentdata_expected_no_output.domain
+    assert experiment_data._jobs == experimentdata_expected_no_output._jobs
+
+
+@pytest.mark.parametrize("input_data", [None])
+@pytest.mark.parametrize("output_data", [None])
+@pytest.mark.parametrize("domain", [make_nd_continuous_domain(bounds=np.array([[0., 1.], [0., 1.], [0., 1.]]),
+                                                              dimensionality=3), path_domain, str_domain])
+def test_init_only_domain(input_data: DataTypes, output_data: DataTypes, domain: Domain | str | Path,
+                          experimentdata_expected_only_domain: ExperimentData,
+                          monkeypatch, tmp_path):
+
+    # if input_data is Callable
+    if callable(input_data):
+        input_data = input_data(tmp_path)
+        expected_data_input = pd.read_csv(input_data)
+
+    # if output_data is Callable
+    if callable(output_data):
+        output_data = output_data(tmp_path)
+        expected_data_output = pd.read_csv(output_data)
+
+    if callable(domain):
+        domain = domain(tmp_path)
+        expected_domain = Domain.from_file(domain)
+
+    # monkeypatch pd.read_csv to return the expected_data DataFrame
+    def mock_read_csv(*args, **kwargs):
+
+        path = args[0]
+        if isinstance(args[0], str):
+            path = Path(path)
+
+        if path == tmp_path / "test_input.csv":
+            return expected_data_input
+
+        elif path == tmp_path / "test_output.csv":
+            return expected_data_output
+
+        else:
+            raise ValueError("Unexpected file path")
+
+    def mock_load_pickle(*args, **kwargs):
+        return expected_domain
+
+    monkeypatch.setattr(pd, "read_csv", mock_read_csv)
+    monkeypatch.setattr(pickle, "load", mock_load_pickle)
+
+    # Initialize ExperimentData with the CSV file
+    experiment_data = ExperimentData(domain=domain, input_data=input_data,
+                                     output_data=output_data)
+
+    # Check if the input_data attribute of ExperimentData matches the expected_data
+    pd.testing.assert_frame_equal(
+        experiment_data._input_data.to_dataframe(), experimentdata_expected_only_domain._input_data.to_dataframe(), check_dtype=False)
+    pd.testing.assert_frame_equal(experiment_data._output_data.to_dataframe(),
+                                  experimentdata_expected_only_domain._output_data.to_dataframe(), check_dtype=False)
+    assert experiment_data._input_data == experimentdata_expected_only_domain._input_data
+    assert experiment_data._output_data == experimentdata_expected_only_domain._output_data
+    assert experiment_data.domain == experimentdata_expected_only_domain.domain
+    assert experiment_data._jobs == experimentdata_expected_only_domain._jobs
+
+    assert experiment_data == experimentdata_expected_only_domain
+
+
+@pytest.mark.parametrize("input_data", [[0.1, 0.2], {"a": 0.1, "b": 0.2}, 0.2, 2])
+def test_invalid_type(input_data):
+    with pytest.raises(TypeError):
+        ExperimentData(input_data=input_data)
+
+
+def test_add_invalid_type(experimentdata: ExperimentData):
+    with pytest.raises(TypeError):
+        experimentdata + 1
+
+
+def test_add_two_different_domains(experimentdata: ExperimentData, experimentdata_continuous: ExperimentData):
+    with pytest.raises(ValueError):
+        experimentdata + experimentdata_continuous
+
+
+def test_repr_html(experimentdata: ExperimentData, monkeypatch):
+    assert isinstance(experimentdata._repr_html_(), str)
+
+
+def test_store(experimentdata: ExperimentData, tmp_path: Path):
+    experimentdata.store(tmp_path / "test")
+    assert (tmp_path / "test" / "experiment_data" / "input.csv").exists()
+    assert (tmp_path / "test" / "experiment_data" / "output.csv").exists()
+    assert (tmp_path / "test" / "experiment_data" / "domain.pkl").exists()
+    assert (tmp_path / "test" / "experiment_data" / "jobs.pkl").exists()
+
+
+def test_store_give_no_filename(experimentdata: ExperimentData, tmp_path: Path):
+    experimentdata.set_project_dir(tmp_path / 'test2')
+    experimentdata.store()
+    assert (tmp_path / "test2" / "experiment_data" / "input.csv").exists()
+    assert (tmp_path / "test2" / "experiment_data" / "output.csv").exists()
+    assert (tmp_path / "test2" / "experiment_data" / "domain.pkl").exists()
+    assert (tmp_path / "test2" / "experiment_data" / "jobs.pkl").exists()
+
+
+@pytest.mark.parametrize("mode", ["sequential", "parallel", "typo"])
+def test_evaluate_mode(mode: str, experimentdata_continuous: ExperimentData, tmp_path: Path):
+    experimentdata_continuous.filename = tmp_path / 'test009'
+
+    if mode == "typo":
+        with pytest.raises(ValueError):
+            experimentdata_continuous.evaluate("ackley", mode=mode, kwargs={
+                                               "scale_bounds": np.array([[0., 1.], [0., 1.], [0., 1.]]), 'seed': SEED})
+    else:
+        experimentdata_continuous.evaluate("ackley", mode=mode, kwargs={
+            "scale_bounds": np.array([[0., 1.], [0., 1.], [0., 1.]]), 'seed': SEED})
+
+
+def test_get_input_data(experimentdata_expected_no_output: ExperimentData):
+    input_data = experimentdata_expected_no_output.get_input_data()
+    df, _ = input_data.to_pandas()
+    pd.testing.assert_frame_equal(df, pd_input(), check_dtype=False, atol=1e-6)
+    assert experimentdata_expected_no_output._input_data == input_data._input_data
+
+
+@pytest.mark.parametrize("selection", ["x0", ["x0"], ["x0", "x2"]])
+def test_get_input_data_selection(experimentdata_expected_no_output: ExperimentData, selection: Iterable[str] | str):
+    input_data = experimentdata_expected_no_output.get_input_data(selection)
+    df, _ = input_data.to_pandas()
+    if isinstance(selection, str):
+        selection = [selection]
+    selected_pd = pd_input()[selection]
+    pd.testing.assert_frame_equal(
+        df, selected_pd, check_dtype=False, atol=1e-6)
+
+
+def test_get_output_data(experimentdata_expected: ExperimentData):
+    output_data = experimentdata_expected.get_output_data()
+    _, df = output_data.to_pandas()
+    pd.testing.assert_frame_equal(df, pd_output(), check_dtype=False)
+    assert experimentdata_expected._output_data == output_data._output_data
+
+
+@pytest.mark.parametrize("selection", ["y", ["y"]])
+def test_get_output_data_selection(experimentdata_expected: ExperimentData, selection: Iterable[str] | str):
+    output_data = experimentdata_expected.get_output_data(selection)
+    _, df = output_data.to_pandas()
+    if isinstance(selection, str):
+        selection = [selection]
+    selected_pd = pd_output()[selection]
+    pd.testing.assert_frame_equal(df, selected_pd, check_dtype=False)
+
+
+def test_iter_behaviour(experimentdata_continuous: ExperimentData):
+    for i in experimentdata_continuous:
+        assert isinstance(i, ExperimentSample)
+
+    selected_experimentdata = experimentdata_continuous.select([0, 2, 4])
+    for i in selected_experimentdata:
+        assert isinstance(i, ExperimentSample)
+
+
+def test_select_with_status_open(experimentdata: ExperimentData):
+    selected_data = experimentdata.select_with_status('open')
+    assert all(job == Status.OPEN for job in selected_data._jobs.jobs)
+
+
+def test_select_with_status_in_progress(experimentdata: ExperimentData):
+    selected_data = experimentdata.select_with_status('in progress')
+    assert all(job == Status.IN_PROGRESS for job in selected_data._jobs.jobs)
+
+
+def test_select_with_status_finished(experimentdata: ExperimentData):
+    selected_data = experimentdata.select_with_status('finished')
+    assert all(job == Status.FINISHED for job in selected_data._jobs.jobs)
+
+
+def test_select_with_status_error(experimentdata: ExperimentData):
+    selected_data = experimentdata.select_with_status('error')
+    assert all(job == Status.ERROR for job in selected_data._jobs.jobs)
+
+
+def test_select_with_status_invalid_status(experimentdata: ExperimentData):
+    with pytest.raises(ValueError):
+        _ = experimentdata.select_with_status('invalid_status')
+
+
+if __name__ == "__main__":  # pragma: no cover
+    pytest.main()
diff --git a/tests/newdata/test_data.py b/tests/newdata/test_data.py
index fb5f0cba..6c5abe52 100644
--- a/tests/newdata/test_data.py
+++ b/tests/newdata/test_data.py
@@ -1,5 +1,4 @@
-from copy import deepcopy
-from typing import Any, Dict, List
+from typing import Any, Dict
 
 import numpy as np
 import pandas as pd
@@ -175,6 +174,14 @@ def test_select_columns_single():
     assert selected_data.data == expected_data
 
 
+def test_rename_columns():
+    input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}}
+    data = _Data(input_data)
+    data.rename_columns({"a": "x", "b": "y"})
+    expected_data = {0: {"x": 1, "y": 2, "c": 3}, 1: {"x": 4, "y": 5, "c": 6}}
+    assert data.data == expected_data
+
+
 def test_drop():
     input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}}
     data = _Data(input_data)

From 74fd3154fe3a470eec433fa2c011a87f5bd6b074 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Tue, 25 Jun 2024 15:57:42 +0200
Subject: [PATCH 11/17] Refactor domain initialization and data loading

---
 src/f3dasm/_src/design/domain.py              |  47 ++-
 src/f3dasm/_src/experimentdata/_data.py       |  10 +-
 .../_experimental/_jobqueue2.py               | 267 ++++++++++++------
 .../experimentdata/_experimental/_newdata2.py |   5 +-
 .../_experimental/_newexperimentdata2.py      |  22 +-
 .../_src/experimentdata/experimentdata.py     |  22 +-
 tests/newdata/test_data.py                    |   2 +-
 7 files changed, 257 insertions(+), 118 deletions(-)

diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py
index 172d9ce1..9c31b9df 100644
--- a/src/f3dasm/_src/design/domain.py
+++ b/src/f3dasm/_src/design/domain.py
@@ -14,7 +14,7 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import (Any, Dict, Iterable, Iterator, List, Literal, Optional,
-                    Sequence, Type)
+                    Protocol, Sequence, Type)
 
 # Third-party core
 import numpy as np
@@ -36,6 +36,13 @@
 # =============================================================================
 
 
+class _Data(Protocol):
+    def to_dataframe(self) -> pd.DataFrame:
+        ...
+
+# =============================================================================
+
+
 @dataclass
 class Domain:
     """Main class for defining the domain of the design of experiments.
@@ -238,6 +245,26 @@ def from_dataframe(cls, df_input: pd.DataFrame,
 
         return cls(space=input_space, output_space=output_space)
 
+    @classmethod
+    def from_data(cls: Type[Domain],
+                  input_data: _Data, output_data: _Data) -> Domain:
+        """Initializes a Domain from input and output data.
+
+        Parameters
+        ----------
+        input_data : _Data
+            Input data.
+        output_data : _Data
+            Output data.
+
+        Returns
+        -------
+        Domain
+            Domain object
+        """
+        return cls.from_dataframe(
+            input_data.to_dataframe(), output_data.to_dataframe())
+
 #                                                                        Export
 # =============================================================================
 
@@ -645,9 +672,7 @@ def make_nd_continuous_domain(bounds: np.ndarray | List[List[float]],
     return Domain(space)
 
 
-def _domain_factory(domain: Domain | DictConfig | None,
-                    input_data: pd.DataFrame,
-                    output_data: pd.DataFrame) -> Domain:
+def _domain_factory(domain: Domain | DictConfig | str | Path) -> Domain:
     if isinstance(domain, Domain):
         return domain
 
@@ -657,14 +682,14 @@ def _domain_factory(domain: Domain | DictConfig | None,
     elif isinstance(domain, DictConfig):
         return Domain.from_yaml(domain)
 
-    elif (input_data.empty and output_data.empty and domain is None):
-        return Domain()
+    # elif (input_data.empty and output_data.empty and domain is None):
+    #     return Domain()
 
-    elif domain is None:
-        return Domain.from_dataframe(
-            input_data, output_data)
+    # elif domain is None:
+    #     return Domain.from_dataframe(
+    #         input_data, output_data)
 
     else:
         raise TypeError(
-            f"Domain must be of type Domain, DictConfig "
-            f"or None, not {type(domain)}")
+            f"Domain must be of type Domain, DictConfig, str or Path, "
+            f"not {type(domain)}")
diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py
index 3817cda3..0721396b 100644
--- a/src/f3dasm/_src/experimentdata/_data.py
+++ b/src/f3dasm/_src/experimentdata/_data.py
@@ -186,7 +186,8 @@ def from_file(cls, filename: Path | str) -> _Data:
         return cls(df, columns=_Columns(_columns))
 
     @classmethod
-    def from_numpy(cls: Type[_Data], array: np.ndarray) -> _Data:
+    def from_numpy(cls: Type[_Data],
+                   array: np.ndarray, keys: Iterable[str]) -> _Data:
         """Loads the data from a numpy array.
 
         Parameters
@@ -458,7 +459,8 @@ def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data:
     return _Data(data=df, columns=_Columns(_columns))
 
 
-def _data_factory(data: DataTypes) -> _Data:
+def _data_factory(data: DataTypes,
+                  keys: Optional[Iterable[str]] = None) -> _Data:
     if data is None:
         return _Data()
 
@@ -469,10 +471,10 @@ def _data_factory(data: DataTypes) -> _Data:
         return _Data.from_dataframe(data)
 
     elif isinstance(data, (Path, str)):
-        return _Data.from_file(data)
+        return _Data.from_file(Path(data))
 
     elif isinstance(data, np.ndarray):
-        return _Data.from_numpy(data)
+        return _Data.from_numpy(data, keys=keys)
 
     else:
         raise TypeError(
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py b/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
index 82721ace..8e10f4ac 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_jobqueue2.py
@@ -35,6 +35,8 @@ class Status(str, Enum):
     def __str__(self) -> str:
         return self.value
 
+# =============================================================================
+
 
 class NoOpenJobsError(Exception):
     """
@@ -52,6 +54,14 @@ def __init__(self, message):
 
 class Index:
     def __init__(self, jobs: pd.Series | None | str = None):
+        """
+        Initializes the Index object.
+
+        Parameters
+        ----------
+        jobs : pd.Series, None, or str, optional
+            Series of jobs, None, or a single job as a string.
+        """
         if isinstance(jobs, str):
             self.jobs = pd.Series(jobs, index=[0], dtype='string')
 
@@ -62,9 +72,30 @@ def __init__(self, jobs: pd.Series | None | str = None):
             self.jobs = jobs
 
     def __len__(self) -> int:
+        """
+        Returns the number of jobs.
+
+        Returns
+        -------
+        int
+            Number of jobs.
+        """
         return len(self.jobs)
 
     def __add__(self, __o: Index | str) -> Index:
+        """
+        Adds another Index or a string to this Index.
+
+        Parameters
+        ----------
+        __o : Index or str
+            Another Index object or a string representing a job.
+
+        Returns
+        -------
+        Index
+            A new Index object containing the combined jobs.
+        """
         if isinstance(__o, str):
             __o = Index(__o)
 
@@ -73,135 +104,212 @@ def __add__(self, __o: Index | str) -> Index:
 
         # Make a copy of other.jobs and modify its index
         other_jobs_copy = deepcopy(__o)
-        other_jobs_copy.jobs.index = pd.Index(range(
-            len(other_jobs_copy))) + self.jobs.index[-1] + 1
+        other_jobs_copy.jobs.index = pd.Index(
+            range(len(other_jobs_copy))) + self.jobs.index[-1] + 1
 
         return Index(pd.concat([self.jobs, other_jobs_copy.jobs]))
 
     def __getitem__(self, indices: int | slice | Iterable[int]) -> Index:
+        """
+        Gets a subset of jobs by indices.
+
+        Parameters
+        ----------
+        indices : int, slice, or Iterable[int]
+            Indices to get.
+
+        Returns
+        -------
+        Index
+            A new Index object containing the selected jobs.
+        """
         if isinstance(indices, int):
             indices = [indices]
         return Index(self.jobs[indices].copy())
 
     def __eq__(self, __o: Index) -> bool:
+        """
+        Checks if this Index is equal to another Index.
+
+        Parameters
+        ----------
+        __o : Index
+            Another Index object to compare.
+
+        Returns
+        -------
+        bool
+            True if the two Index objects are equal, False otherwise.
+        """
         return self.jobs.equals(__o.jobs)
 
     def _repr_html_(self) -> str:
+        """
+        Returns an HTML representation of the jobs.
+
+        Returns
+        -------
+        str
+            HTML representation of the jobs.
+        """
         return self.jobs.__repr__()
 
     @property
     def indices(self) -> pd.Index:
-        """The indices of the jobs."""
+        """
+        The indices of the jobs.
+
+        Returns
+        -------
+        pd.Index
+            The indices of the jobs.
+        """
         return self.jobs.index
 
-    def iloc(self, indices: Iterable[int]) -> Iterable[int]:
+    def iloc(self, indices: Iterable[int] | int) -> Iterable[int]:
+        """
+        Gets the position of the given indices in the jobs.
+
+        Parameters
+        ----------
+        indices : Iterable[int] or int
+            Indices to locate.
+
+        Returns
+        -------
+        Iterable[int]
+            Positions of the given indices.
+        """
+        if isinstance(indices, int):
+            indices = [indices]
         return self.indices.get_indexer(indices)
 
-    #                                                  Alternative Constructors
-    # =========================================================================
+    def is_all_finished(self) -> bool:
+        """
+        Checks if all jobs are finished.
+
+        Returns
+        -------
+        bool
+            True if all jobs are finished, False otherwise.
+        """
+        return all(self.jobs.isin([Status.FINISHED, Status.ERROR]))
 
     @classmethod
     def from_data(cls: Type[Index], data: _Data,
                   value: str = Status.OPEN) -> Index:
-        """Create a JobQueue object from a Data object.
+        """
+        Create an Index object from a Data object.
 
         Parameters
         ----------
-        data : Data
+        data : _Data
             Data object containing the data.
-        value : str
+        value : str, optional
             The value to assign to the jobs. Can be 'open',
-            'in progress', 'finished', or 'error'.
+            'in_progress', 'finished', or 'error'. Default is 'open'.
 
         Returns
         -------
-        JobQueue
-            JobQueue object containing the loaded data.
+        Index
+            Index object containing the loaded data.
         """
         return cls(pd.Series([value] * len(data), dtype='string'))
 
     @classmethod
     def from_file(cls: Type[Index], filename: Path | str) -> Index:
-        """Create a JobQueue object from a pickle file.
+        """
+        Create an Index object from a pickle file.
 
         Parameters
         ----------
-        filename : Path | str
+        filename : Path or str
             Name of the file.
 
         Returns
         -------
-        JobQueue
-            JobQueue object containing the loaded data.
+        Index
+            Index object containing the loaded data.
+
+        Raises
+        ------
+        FileNotFoundError
+            If the specified file does not exist.
         """
-        # Convert filename to Path
         if Path(filename).with_suffix('.csv').exists():
-            return cls(
-                pd.read_csv(Path(filename).with_suffix('.csv'),
-                            index_col=0)['0'])
-
+            return cls(pd.read_csv(Path(filename).with_suffix('.csv'),
+                                   index_col=0)['0'])
         elif Path(filename).with_suffix('.pkl').exists():
-            return cls(
-                pd.read_pickle(Path(filename).with_suffix('.pkl')))
-
+            return cls(pd.read_pickle(Path(filename).with_suffix('.pkl')))
         else:
             raise FileNotFoundError(f"Jobfile {filename} does not exist.")
 
-    #                                                                    Select
-    # =========================================================================
-
     def select_all(self, status: str) -> Index:
-        """Selects all jobs with a certain status.
+        """
+        Selects all jobs with a certain status.
 
         Parameters
         ----------
         status : str
-            Status of the jobs to select
+            Status of the jobs to select.
 
         Returns
         -------
-        JobQueue
-            JobQueue object containing the selected jobs.
+        Index
+            Index object containing the selected jobs.
         """
         return Index(self.jobs[self.jobs == status])
 
-    #                                                                    Export
-    # =========================================================================
-
     def store(self, filename: Path) -> None:
-        """Stores the jobs in a pickle file.
+        """
+        Stores the jobs in a pickle file.
 
         Parameters
         ----------
         filename : Path
             Path of the file.
         """
-        self.jobs.to_csv(filename.with_suffix('.csv'))
+        self.jobs.to_pickle(filename.with_suffix('.pkl'))
+        # self.jobs.to_csv(filename.with_suffix('.csv'))
 
     def to_dataframe(self, name: str = "") -> pd.DataFrame:
-        """Converts the job queue to a DataFrame.
+        """
+        Converts the job queue to a DataFrame.
 
         Parameters
         ----------
         name : str, optional
-            Name of the column, by default "".
-
-        Note
-        ----
-        If the name is not specified, the column name will be an empty string
+            Name of the column. Default is an empty string.
 
         Returns
         -------
-        DataFrame
+        pd.DataFrame
             DataFrame containing the jobs.
         """
-        return self.jobs.to_frame("")
+        return self.jobs.to_frame(name)
 
-    #                                                    Append and remove jobs
-    # =========================================================================
+    def get_open_job(self) -> int:
+        """
+        Returns the index of an open job.
 
-    def remove(self, indices: List[int]):
-        """Removes a subset of the jobs.
+        Returns
+        -------
+        int
+            Index of an open job.
+
+        Raises
+        ------
+        NoOpenJobsError
+            If no open jobs are found.
+        """
+        try:
+            return int(self.jobs[self.jobs == Status.OPEN].index[0])
+        except IndexError:
+            raise NoOpenJobsError("No open jobs found.")
+
+    def remove(self, indices: List[int]) -> None:
+        """
+        Removes a subset of the jobs.
 
         Parameters
         ----------
@@ -210,69 +318,56 @@ def remove(self, indices: List[int]):
         """
         self.jobs = self.jobs.drop(indices)
 
-    def overwrite(
-            self, indices: Iterable[int],
-            other: Index | str) -> None:
+    def overwrite(self, indices: Iterable[int], other: Index | str) -> None:
+        """
+        Overwrites the jobs at the specified indices with new jobs.
 
+        Parameters
+        ----------
+        indices : Iterable[int]
+            Indices to overwrite.
+        other : Index or str
+            New jobs to overwrite with.
+        """
         if isinstance(other, str):
-            other = Index(
-                pd.Series([other], index=[0], dtype='string'))
+            other = Index(pd.Series([other], index=[0], dtype='string'))
 
         self.jobs.update(other.jobs.set_axis(indices))
 
-    #                                                                      Mark
-    # =========================================================================
-
     def mark(self, index: int | slice | Iterable[int], status: Status) -> None:
-        """Marks a job with a certain status.
+        """
+        Marks a job with a certain status.
 
         Parameters
         ----------
-        index : int
+        index : int, slice, or Iterable[int]
             Index of the job to mark.
-        status : str
+        status : Status
             Status to mark the job with.
         """
         self.jobs.loc[index] = status
 
     def mark_all_in_progress_open(self) -> None:
-        """Marks all jobs as 'open'."""
+        """
+        Marks all jobs as 'open'.
+        """
         self.jobs = self.jobs.replace(Status.IN_PROGRESS, Status.OPEN)
 
     def mark_all_error_open(self) -> None:
-        """Marks all jobs as 'open'."""
-        self.jobs = self.jobs.replace(Status.ERROR, Status.OPEN)
-    #                                                              Miscellanous
-    # =========================================================================
-
-    def is_all_finished(self) -> bool:
-        """Checks if all jobs are finished.
-
-        Returns
-        -------
-        bool
-            True if all jobs are finished, False otherwise.
         """
-        return all(self.jobs.isin([Status.FINISHED, Status.ERROR]))
-
-    def get_open_job(self) -> int:
-        """Returns the index of an open job.
-
-        Returns
-        -------
-        int
-            Index of an open job.
+        Marks all jobs as 'open'.
         """
-        try:  # try to find an open job
-            return int(self.jobs[self.jobs == Status.OPEN].index[0])
-        except IndexError:
-            raise NoOpenJobsError("No open jobs found.")
+        self.jobs = self.jobs.replace(Status.ERROR, Status.OPEN)
 
     def reset_index(self) -> None:
-        """Resets the index of the jobs."""
+        """
+        Resets the index of the jobs.
+        """
         self.jobs.reset_index(drop=True, inplace=True)
 
 
+# =============================================================================
+
 def _jobs_factory(jobs: Path | str | Index | None, input_data: _Data,
                   output_data: _Data, job_value: Status) -> Index:
     """Creates a Index object from particular inpute
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
index 26df0982..c0cc9745 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py
@@ -221,7 +221,8 @@ def from_file(cls, filename: Path) -> _Data:
         _Data
             The created _Data object.
         """
-        ...
+        df = pd.read_csv(filename.with_suffix('.csv'), header=0, index_col=0)
+        return cls.from_dataframe(df)
 
     @classmethod
     def from_numpy(cls: Type[_Data], array: np.ndarray,
@@ -322,7 +323,7 @@ def store(self, filename: Path):
         filename : Path
             The file to store the data in.
         """
-        ...
+        self.to_dataframe().to_csv(filename.with_suffix('.csv'))
 
     def get_data_dict(self, row: int) -> Dict[str, Any]:
         """
diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
index 7851f30f..57e151ae 100644
--- a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
+++ b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py
@@ -110,12 +110,20 @@ def __init__(self,
 
         self.project_dir = _project_dir_factory(project_dir)
 
-        if isinstance(input_data, np.ndarray) and isinstance(domain, Domain):
-            self._input_data = _data_factory(input_data, domain.names)
-            self._output_data = _data_factory(output_data, domain.output_names)
+        # DOMAIN
+        if domain is None:
+            self.domain = Domain.from_data(
+                input_data=_data_factory(input_data),
+                output_data=_data_factory(output_data))
+
         else:
-            self._input_data = _data_factory(input_data)
-            self._output_data = _data_factory(output_data)
+            self.domain = _domain_factory(domain=domain)
+
+        # INPUT AND OUTPUT DATAA
+        self._input_data = _data_factory(
+            data=input_data, keys=self.domain.names)
+        self._output_data = _data_factory(
+            data=output_data, keys=self.domain.output_names)
 
         # Create empty output_data from indices if output_data is empty
         if self._output_data.is_empty():
@@ -125,10 +133,6 @@ def __init__(self,
         else:
             job_value = Status.FINISHED
 
-        self.domain = _domain_factory(
-            domain=domain, input_data=self._input_data.to_dataframe(),
-            output_data=self._output_data.to_dataframe())
-
         # Create empty input_data from domain if input_data is empty
         if self._input_data.is_empty():
             self._input_data = _Data()
diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index f053fdc5..72f66d0e 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -110,8 +110,20 @@ def __init__(self,
 
         self.project_dir = _project_dir_factory(project_dir)
 
-        self._input_data = _data_factory(input_data)
-        self._output_data = _data_factory(output_data)
+        # DOMAIN
+        if domain is None:
+            self.domain = Domain.from_data(
+                input_data=_data_factory(input_data),
+                output_data=_data_factory(output_data))
+
+        else:
+            self.domain = _domain_factory(domain=domain)
+
+        # INPUT AND OUTPUT DATAA
+        self._input_data = _data_factory(
+            data=input_data, keys=self.domain.names)
+        self._output_data = _data_factory(
+            data=output_data, keys=self.domain.output_names)
 
         # Create empty output_data from indices if output_data is empty
         if self._output_data.is_empty():
@@ -121,9 +133,9 @@ def __init__(self,
         else:
             job_value = Status.FINISHED
 
-        self.domain = _domain_factory(
-            domain=domain, input_data=self._input_data.to_dataframe(),
-            output_data=self._output_data.to_dataframe())
+        # self.domain = _domain_factory(
+        #     domain=domain, input_data=self._input_data.to_dataframe(),
+        #     output_data=self._output_data.to_dataframe())
 
         # Create empty input_data from domain if input_data is empty
         if self._input_data.is_empty():
diff --git a/tests/newdata/test_data.py b/tests/newdata/test_data.py
index 6c5abe52..644496ac 100644
--- a/tests/newdata/test_data.py
+++ b/tests/newdata/test_data.py
@@ -115,7 +115,7 @@ def test_len():
 
 def test_indices():
     data = _Data({0: {"a": 1}, 1: {"a": 2}})
-    assert data.indices == [0, 1]
+    assert data.indices.equals(pd.Index([0, 1]))
 
 
 def test_names():

From 1eaae8b7efd9412ebddf8225cbe1b4a3ca8112ca Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Wed, 26 Jun 2024 12:06:19 +0200
Subject: [PATCH 12/17] Added tmp file creation after storing. Fixes #273

---
 src/f3dasm/_src/design/domain.py              |  5 ++-
 src/f3dasm/_src/experimentdata/_data.py       |  5 ++-
 src/f3dasm/_src/experimentdata/_io.py         | 42 +++++++++++++++++++
 src/f3dasm/_src/experimentdata/_jobqueue.py   |  5 ++-
 .../_src/experimentdata/experimentdata.py     |  6 ++-
 5 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py
index 9c31b9df..930a36be 100644
--- a/src/f3dasm/_src/design/domain.py
+++ b/src/f3dasm/_src/design/domain.py
@@ -276,9 +276,12 @@ def store(self, filename: Path) -> None:
         filename : str
             Name of the file.
         """
-        with open(filename.with_suffix('.pkl'), 'wb') as f:
+        with open(filename.with_suffix('.tmp'), 'wb') as f:
             pickle.dump(self, f)
 
+        # rename the file to the correct extension
+        filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
+
     def _cast_types_dataframe(self) -> dict:
         """Make a dictionary that provides the datatype of each parameter"""
         return {name: parameter._type for
diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py
index 0721396b..849326f9 100644
--- a/src/f3dasm/_src/experimentdata/_data.py
+++ b/src/f3dasm/_src/experimentdata/_data.py
@@ -264,7 +264,10 @@ def store(self, filename: Path) -> None:
         The data is stored as a csv file.
         """
         # TODO: The column information is not saved in the .csv!
-        self.to_dataframe().to_csv(filename.with_suffix('.csv'))
+        self.to_dataframe().to_csv(filename.with_suffix('.tmp'))
+
+        # rename the file to the correct extension
+        filename.with_suffix('.tmp').rename(filename.with_suffix('.csv'))
 
     def n_best_samples(self, nosamples: int,
                        column_name: List[str] | str) -> pd.DataFrame:
diff --git a/src/f3dasm/_src/experimentdata/_io.py b/src/f3dasm/_src/experimentdata/_io.py
index f602dbac..385f495e 100644
--- a/src/f3dasm/_src/experimentdata/_io.py
+++ b/src/f3dasm/_src/experimentdata/_io.py
@@ -11,6 +11,7 @@
 # Standard
 import pickle
 from pathlib import Path
+from time import sleep
 from typing import Any, Mapping, Optional, Type
 
 # Third-party
@@ -43,6 +44,13 @@
 RESOLUTION_MATPLOTLIB_FIGURE = 300
 MAX_TRIES = 10
 
+#                                                                    Exceptions
+# =============================================================================
+
+
+class TemporaryFilesNotCleared(Exception):
+    pass
+
 #                                                               Storing methods
 # =============================================================================
 
@@ -365,3 +373,37 @@ def _project_dir_factory(project_dir: Path | str | None) -> Path:
     raise TypeError(
         f"project_dir must be of type Path, str or None, \
             not {type(project_dir).__name__}")
+
+
+def check_for_temporary_files(directory: Path, delay: float = 0.3):
+    """
+    Check if there are any .tmp files in the subdirectory.
+
+    Parameters
+    ----------
+    subdirectory : Path
+        subdirectory to check for temporary files
+    delay : float, optional
+        delay between checks, by default 0.3
+
+    Raises
+    ------
+    TemporaryFilesNotCleared
+        Raises if temporary files are found after the maximum number of tries
+    """
+    for attempt in range(MAX_TRIES):
+        if not any(directory.glob('*.tmp')):
+            logger.debug((
+                f"No temporary files found in {directory} after "
+                f"{attempt + 1} tries.")
+            )
+            break
+        logger.debug((
+            f"Temporary files found in {directory} after {attempt + 1} "
+            f"tries. Waiting {delay} seconds before checking again.")
+        )
+        sleep(delay)
+    else:
+        raise TemporaryFilesNotCleared((
+            f"Temporary files found in {directory} after {MAX_TRIES} tries."
+        ))
diff --git a/src/f3dasm/_src/experimentdata/_jobqueue.py b/src/f3dasm/_src/experimentdata/_jobqueue.py
index 79264ce1..2f83cefe 100644
--- a/src/f3dasm/_src/experimentdata/_jobqueue.py
+++ b/src/f3dasm/_src/experimentdata/_jobqueue.py
@@ -197,7 +197,10 @@ def store(self, filename: Path) -> None:
         filename : Path
             Path of the file.
         """
-        self.jobs.to_pickle(filename.with_suffix('.pkl'))
+        self.jobs.to_pickle(filename.with_suffix('.tmp'))
+
+        # rename the file to the correct extension
+        filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
 
     def to_dataframe(self, name: str = "") -> pd.DataFrame:
         """Converts the job queue to a DataFrame.
diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index 72f66d0e..ff5086d7 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -38,7 +38,8 @@
 from ._data import DataTypes, _Data, _data_factory
 from ._io import (DOMAIN_FILENAME, EXPERIMENTDATA_SUBFOLDER,
                   INPUT_DATA_FILENAME, JOBS_FILENAME, LOCK_FILENAME, MAX_TRIES,
-                  OUTPUT_DATA_FILENAME, _project_dir_factory)
+                  OUTPUT_DATA_FILENAME, _project_dir_factory,
+                  check_for_temporary_files)
 from ._jobqueue import NoOpenJobsError, Status, _jobs_factory
 from .experimentsample import ExperimentSample
 from .samplers import Sampler, SamplerNames, _sampler_factory
@@ -394,6 +395,9 @@ def _from_file_attempt(cls: Type[ExperimentData],
         """
         subdirectory = project_dir / EXPERIMENTDATA_SUBFOLDER
 
+        # check if there is any .tmp file in the subdirectory
+        check_for_temporary_files(subdirectory)
+
         try:
             return cls(domain=subdirectory / DOMAIN_FILENAME,
                        input_data=subdirectory / INPUT_DATA_FILENAME,

From 06789592a225002422409b10e5872c3f9c0ba01e Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Wed, 26 Jun 2024 13:11:55 +0200
Subject: [PATCH 13/17] Remove old files and rename files with correct
 extensions

---
 src/f3dasm/_src/design/domain.py            | 3 +++
 src/f3dasm/_src/experimentdata/_data.py     | 3 +++
 src/f3dasm/_src/experimentdata/_jobqueue.py | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py
index 930a36be..29c36b96 100644
--- a/src/f3dasm/_src/design/domain.py
+++ b/src/f3dasm/_src/design/domain.py
@@ -279,6 +279,9 @@ def store(self, filename: Path) -> None:
         with open(filename.with_suffix('.tmp'), 'wb') as f:
             pickle.dump(self, f)
 
+        # remove old file if it exists
+        filename.with_suffix('.pkl').unlink(missing_ok=True)
+
         # rename the file to the correct extension
         filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
 
diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py
index 849326f9..1a17448b 100644
--- a/src/f3dasm/_src/experimentdata/_data.py
+++ b/src/f3dasm/_src/experimentdata/_data.py
@@ -266,6 +266,9 @@ def store(self, filename: Path) -> None:
         # TODO: The column information is not saved in the .csv!
         self.to_dataframe().to_csv(filename.with_suffix('.tmp'))
 
+        # remove the old file if it exists
+        filename.with_suffix('.csv').unlink(missing_ok=True)
+
         # rename the file to the correct extension
         filename.with_suffix('.tmp').rename(filename.with_suffix('.csv'))
 
diff --git a/src/f3dasm/_src/experimentdata/_jobqueue.py b/src/f3dasm/_src/experimentdata/_jobqueue.py
index 2f83cefe..34818226 100644
--- a/src/f3dasm/_src/experimentdata/_jobqueue.py
+++ b/src/f3dasm/_src/experimentdata/_jobqueue.py
@@ -199,6 +199,9 @@ def store(self, filename: Path) -> None:
         """
         self.jobs.to_pickle(filename.with_suffix('.tmp'))
 
+        # remove old file if it exists
+        filename.with_suffix('.pkl').unlink(missing_ok=True)
+
         # rename the file to the correct extension
         filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
 

From 5070d70b801db5b8d656019d5e6ba625b844011e Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Wed, 26 Jun 2024 13:39:14 +0200
Subject: [PATCH 14/17] remove retrieved updated experimentdata cluster mode

---
 src/f3dasm/_src/experimentdata/experimentdata.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index ff5086d7..ca700764 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -1296,10 +1296,13 @@ def _run_cluster(self, data_generator: DataGenerator, kwargs: dict):
         NoOpenJobsError
             Raised when there are no open jobs left
         """
-        # Retrieve the updated experimentdata object from disc
-        try:
-            self = self.from_file(self.project_dir)
-        except FileNotFoundError:  # If not found, store current
+        # # Retrieve the updated experimentdata object from disc
+        # try:
+        #     self = self.from_file(self.project_dir)
+        # except FileNotFoundError:  # If not found, store current
+        #     self.store()
+
+        if not (self.project_dir / EXPERIMENTDATA_SUBFOLDER).exists():
             self.store()
 
         while True:

From ca4606cdbfd01dd33201183c1bd8964d87c5e6b0 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Wed, 26 Jun 2024 14:01:41 +0200
Subject: [PATCH 15/17] Add optional parameter to store methods

---
 src/f3dasm/_src/design/domain.py              | 18 +++++++++++-------
 src/f3dasm/_src/experimentdata/_data.py       | 19 ++++++++++++-------
 src/f3dasm/_src/experimentdata/_jobqueue.py   | 15 +++++++++------
 .../_src/experimentdata/experimentdata.py     | 19 ++++++++++++++-----
 4 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/src/f3dasm/_src/design/domain.py b/src/f3dasm/_src/design/domain.py
index 29c36b96..c88acc14 100644
--- a/src/f3dasm/_src/design/domain.py
+++ b/src/f3dasm/_src/design/domain.py
@@ -268,7 +268,7 @@ def from_data(cls: Type[Domain],
 #                                                                        Export
 # =============================================================================
 
-    def store(self, filename: Path) -> None:
+    def store(self, filename: Path, create_tmp: bool = False) -> None:
         """Stores the Domain in a pickle file.
 
         Parameters
@@ -276,14 +276,18 @@ def store(self, filename: Path) -> None:
         filename : str
             Name of the file.
         """
-        with open(filename.with_suffix('.tmp'), 'wb') as f:
-            pickle.dump(self, f)
+        if create_tmp:
+            with open(filename.with_suffix('.tmp'), 'wb') as f:
+                pickle.dump(self, f)
 
-        # remove old file if it exists
-        filename.with_suffix('.pkl').unlink(missing_ok=True)
+            # remove old file if it exists
+            filename.with_suffix('.pkl').unlink(missing_ok=True)
 
-        # rename the file to the correct extension
-        filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
+            # rename the file to the correct extension
+            filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
+        else:
+            with open(filename.with_suffix('.pkl'), 'wb') as f:
+                pickle.dump(self, f)
 
     def _cast_types_dataframe(self) -> dict:
         """Make a dictionary that provides the datatype of each parameter"""
diff --git a/src/f3dasm/_src/experimentdata/_data.py b/src/f3dasm/_src/experimentdata/_data.py
index 1a17448b..137ff3a8 100644
--- a/src/f3dasm/_src/experimentdata/_data.py
+++ b/src/f3dasm/_src/experimentdata/_data.py
@@ -251,7 +251,7 @@ def to_dataframe(self) -> pd.DataFrame:
         df.columns = self.names
         return df.astype(object)
 
-    def store(self, filename: Path) -> None:
+    def store(self, filename: Path, create_tmp: bool = False) -> None:
         """Stores the data to a file.
 
         Parameters
@@ -263,14 +263,19 @@ def store(self, filename: Path) -> None:
         ----
         The data is stored as a csv file.
         """
-        # TODO: The column information is not saved in the .csv!
-        self.to_dataframe().to_csv(filename.with_suffix('.tmp'))
 
-        # remove the old file if it exists
-        filename.with_suffix('.csv').unlink(missing_ok=True)
+        if create_tmp:
+            self.to_dataframe().to_csv(filename.with_suffix('.tmp'))
 
-        # rename the file to the correct extension
-        filename.with_suffix('.tmp').rename(filename.with_suffix('.csv'))
+            # remove the old file if it exists
+            filename.with_suffix('.csv').unlink(missing_ok=True)
+
+            # rename the file to the correct extension
+            filename.with_suffix('.tmp').rename(filename.with_suffix('.csv'))
+
+        else:
+            # TODO: The column information is not saved in the .csv!
+            self.to_dataframe().to_csv(filename.with_suffix('.csv'))
 
     def n_best_samples(self, nosamples: int,
                        column_name: List[str] | str) -> pd.DataFrame:
diff --git a/src/f3dasm/_src/experimentdata/_jobqueue.py b/src/f3dasm/_src/experimentdata/_jobqueue.py
index 34818226..63f105af 100644
--- a/src/f3dasm/_src/experimentdata/_jobqueue.py
+++ b/src/f3dasm/_src/experimentdata/_jobqueue.py
@@ -189,7 +189,7 @@ def select_all(self, status: str) -> _JobQueue:
     #                                                                    Export
     # =========================================================================
 
-    def store(self, filename: Path) -> None:
+    def store(self, filename: Path, create_tmp: bool = False) -> None:
         """Stores the jobs in a pickle file.
 
         Parameters
@@ -197,13 +197,16 @@ def store(self, filename: Path) -> None:
         filename : Path
             Path of the file.
         """
-        self.jobs.to_pickle(filename.with_suffix('.tmp'))
+        if create_tmp:
+            self.jobs.to_pickle(filename.with_suffix('.tmp'))
 
-        # remove old file if it exists
-        filename.with_suffix('.pkl').unlink(missing_ok=True)
+            # remove old file if it exists
+            filename.with_suffix('.pkl').unlink(missing_ok=True)
 
-        # rename the file to the correct extension
-        filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
+            # rename the file to the correct extension
+            filename.with_suffix('.tmp').rename(filename.with_suffix('.pkl'))
+        else:
+            self.jobs.to_pickle(filename.with_suffix('.pkl'))
 
     def to_dataframe(self, name: str = "") -> pd.DataFrame:
         """Converts the job queue to a DataFrame.
diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index ca700764..8deb2fff 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -551,7 +551,8 @@ def get_output_data(self,
     #                                                                    Export
     # =========================================================================
 
-    def store(self, project_dir: Optional[Path | str] = None):
+    def store(self, project_dir: Optional[Path | str] = None,
+              create_tmp: bool = False):
         """Write the ExperimentData to disk in the project directory.
 
         Parameters
@@ -589,10 +590,18 @@ def store(self, project_dir: Optional[Path | str] = None):
         # Create the subdirectory if it does not exist
         subdirectory.mkdir(parents=True, exist_ok=True)
 
-        self._input_data.store(subdirectory / Path(INPUT_DATA_FILENAME))
-        self._output_data.store(subdirectory / Path(OUTPUT_DATA_FILENAME))
-        self._jobs.store(subdirectory / Path(JOBS_FILENAME))
-        self.domain.store(subdirectory / Path(DOMAIN_FILENAME))
+        self._input_data.store(
+            filename=subdirectory / Path(INPUT_DATA_FILENAME),
+            create_tmp=create_tmp)
+        self._output_data.store(
+            filename=subdirectory / Path(OUTPUT_DATA_FILENAME),
+            create_tmp=create_tmp)
+        self._jobs.store(
+            filename=subdirectory / Path(JOBS_FILENAME),
+            create_tmp=create_tmp)
+        self.domain.store(
+            filename=subdirectory / Path(DOMAIN_FILENAME),
+            create_tmp=create_tmp)
 
     def to_numpy(self) -> Tuple[np.ndarray, np.ndarray]:
         """

From f63e1d30443e7eb54458c5723a488a051213e500 Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Wed, 26 Jun 2024 14:16:12 +0200
Subject: [PATCH 16/17] Fix reading empty Pandas DataFrame error

---
 src/f3dasm/_src/experimentdata/_io.py         |  4 +++
 .../_src/experimentdata/experimentdata.py     | 28 +++++++++++--------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/f3dasm/_src/experimentdata/_io.py b/src/f3dasm/_src/experimentdata/_io.py
index 385f495e..b4a0d92f 100644
--- a/src/f3dasm/_src/experimentdata/_io.py
+++ b/src/f3dasm/_src/experimentdata/_io.py
@@ -51,6 +51,10 @@
 class TemporaryFilesNotCleared(Exception):
     pass
 
+
+class ReadingEmptyPandasDataFrameError(Exception):
+    pass
+
 #                                                               Storing methods
 # =============================================================================
 
diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index 8deb2fff..f6d1ac49 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -38,8 +38,8 @@
 from ._data import DataTypes, _Data, _data_factory
 from ._io import (DOMAIN_FILENAME, EXPERIMENTDATA_SUBFOLDER,
                   INPUT_DATA_FILENAME, JOBS_FILENAME, LOCK_FILENAME, MAX_TRIES,
-                  OUTPUT_DATA_FILENAME, _project_dir_factory,
-                  check_for_temporary_files)
+                  OUTPUT_DATA_FILENAME, ReadingEmptyPandasDataFrameError,
+                  _project_dir_factory, check_for_temporary_files)
 from ._jobqueue import NoOpenJobsError, Status, _jobs_factory
 from .experimentsample import ExperimentSample
 from .samplers import Sampler, SamplerNames, _sampler_factory
@@ -398,15 +398,21 @@ def _from_file_attempt(cls: Type[ExperimentData],
         # check if there is any .tmp file in the subdirectory
         check_for_temporary_files(subdirectory)
 
-        try:
-            return cls(domain=subdirectory / DOMAIN_FILENAME,
-                       input_data=subdirectory / INPUT_DATA_FILENAME,
-                       output_data=subdirectory / OUTPUT_DATA_FILENAME,
-                       jobs=subdirectory / JOBS_FILENAME,
-                       project_dir=project_dir)
-        except FileNotFoundError:
-            raise FileNotFoundError(
-                f"Cannot find the files from {subdirectory}.")
+        for attempt in range(MAX_TRIES):
+            try:
+                return cls(domain=subdirectory / DOMAIN_FILENAME,
+                           input_data=subdirectory / INPUT_DATA_FILENAME,
+                           output_data=subdirectory / OUTPUT_DATA_FILENAME,
+                           jobs=subdirectory / JOBS_FILENAME,
+                           project_dir=project_dir)
+            except FileNotFoundError:
+                raise FileNotFoundError(
+                    f"Cannot find the files from {subdirectory}.")
+            except pd.errors.EmptyDataError:
+                sleep(1)
+                continue
+
+        raise ReadingEmptyPandasDataFrameError(f"Reading empty dataframes")
 
     #                                                         Selecting subsets
     # =========================================================================

From f27235a18e038dc7990e4908a96ced4b55c5e8dc Mon Sep 17 00:00:00 2001
From: Martin van der Schelling
 <61459087+mpvanderschelling@users.noreply.github.com>
Date: Wed, 26 Jun 2024 14:17:24 +0200
Subject: [PATCH 17/17] flake8 fix

---
 src/f3dasm/_src/experimentdata/experimentdata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/f3dasm/_src/experimentdata/experimentdata.py b/src/f3dasm/_src/experimentdata/experimentdata.py
index f6d1ac49..be63b7d2 100644
--- a/src/f3dasm/_src/experimentdata/experimentdata.py
+++ b/src/f3dasm/_src/experimentdata/experimentdata.py
@@ -412,7 +412,7 @@ def _from_file_attempt(cls: Type[ExperimentData],
                 sleep(1)
                 continue
 
-        raise ReadingEmptyPandasDataFrameError(f"Reading empty dataframes")
+        raise ReadingEmptyPandasDataFrameError("Reading empty dataframes")
 
     #                                                         Selecting subsets
     # =========================================================================