Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Refactor Suggestions I] Mixins all the way #19

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ classifiers = [
]

[project.scripts]
nwb_benchmarks = "nwb_benchmarks.command_line_interface:main"
nwb_benchmarks = "nwb_benchmarks._command_line_interface:_asv_wrapper"

[tool.black]
line-length = 120
Expand Down
1 change: 0 additions & 1 deletion src/nwb_benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
from .command_line_interface import main
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys


def main():
def _asv_wrapper():
"""Simple wrapper around `asv run` for conveneience."""
# TODO: swap to click
if len(sys.argv) <= 1:
Expand Down Expand Up @@ -46,4 +46,4 @@ def main():


if __name__ == "__main__":
main()
_asv_wrapper()
71 changes: 18 additions & 53 deletions src/nwb_benchmarks/benchmarks/streaming.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,45 @@
"""Basic benchmarks for NWB."""
import warnings

from .streaming_base import (
ElectricalSeriesStreamingFsspecBase,
ElectricalSeriesStreamingRemfileBase,
ElectricalSeriesStreamingROS3Base,
FileReadStreamingBase,
"""Basic benchmarks for stream NWB files and their contents."""
from ..core import (
AcquisitionTimeSeriesSlice,
FsspecNwbFileRead,
RemfileNwbFileRead,
RemoteNwbFileReadBenchmark,
Ros3NwbFileRead,
TimeSliceRequestBenchmark,
)

# Useful if running in verbose mode
warnings.filterwarnings(action="ignore", message="No cached namespaces found in .*")
warnings.filterwarnings(action="ignore", message="Ignoring cached namespace .*")


class FileReadStreaming(FileReadStreamingBase):
"""A basic benchmark for streaming an NWB file from the DANDI archive."""

class FsspecFileReadBenchmark(RemoteNwbFileReadBenchmark, FsspecNwbFileRead):
repeat = 1
s3_url = "https://dandiarchive.s3.amazonaws.com/blobs/8c5/65f/8c565f28-e5fc-43fe-8fb7-318ad2081319"

def time_fsspec_no_cache(self):
self.fsspec_no_cache()

def time_ros3(self):
self.ros3()

def time_remfile(self):
self.remfile()
class Ros3FileReadBenchmark(RemoteNwbFileReadBenchmark, Ros3NwbFileRead):
repeat = 1
s3_url = "https://dandiarchive.s3.amazonaws.com/blobs/8c5/65f/8c565f28-e5fc-43fe-8fb7-318ad2081319"


class ElectricalSeriesStreamingROS3(ElectricalSeriesStreamingROS3Base):
"""
A basic benchmark for streaming raw ecephys data.
class RemfileFileReadBenchmark(RemoteNwbFileReadBenchmark, RemfileNwbFileRead):
repeat = 1
s3_url = "https://dandiarchive.s3.amazonaws.com/blobs/8c5/65f/8c565f28-e5fc-43fe-8fb7-318ad2081319"

Needs separate setup per class to only time slicing operation.
"""

class ElectricalSeriesStreamingROS3(TimeSliceRequestBenchmark, Ros3NwbFileRead, AcquisitionTimeSeriesSlice):
repeat = 1
s3_url = "s3://dandiarchive/blobs/8c5/65f/8c565f28-e5fc-43fe-8fb7-318ad2081319"
acquisition_path = "ElectricalSeriesAp"
slice_range = (slice(0, 30_000), slice(0, 384)) # ~23 MB

def time_slice_request(self):
"""Time for the slice_request test case"""
self.slice_request()


class ElectricalSeriesStreamingFsspec(ElectricalSeriesStreamingFsspecBase):
"""
A basic benchmark for streaming raw ecephys data.

Needs separate setup per class to only time slicing operation.
"""

class ElectricalSeriesStreamingFsspec(TimeSliceRequestBenchmark, FsspecNwbFileRead, AcquisitionTimeSeriesSlice):
repeat = 1
s3_url = "https://dandiarchive.s3.amazonaws.com/blobs/8c5/65f/8c565f28-e5fc-43fe-8fb7-318ad2081319"
acquisition_path = "ElectricalSeriesAp"
slice_range = (slice(0, 30_000), slice(0, 384)) # ~23 MB

def time_slice_request(self):
"""Time for the slice_request test case"""
self.slice_request()


class ElectricalSeriesStreamingRemfile(ElectricalSeriesStreamingRemfileBase):
"""
A basic benchmark for streaming raw ecephys data.

Needs separate setup per class to only time slicing operation.
"""

class ElectricalSeriesStreamingRemfile(TimeSliceRequestBenchmark, RemfileNwbFileRead, AcquisitionTimeSeriesSlice):
repeat = 1
s3_url = "https://dandiarchive.s3.amazonaws.com/blobs/8c5/65f/8c565f28-e5fc-43fe-8fb7-318ad2081319"
acquisition_path = "ElectricalSeriesAp"
slice_range = (slice(0, 30_000), slice(0, 384)) # ~23 MB

def time_slice_request(self):
"""Time for the slice_request test case"""
self.slice_request()
141 changes: 0 additions & 141 deletions src/nwb_benchmarks/benchmarks/streaming_base.py
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that the main benchmarks folder is for things ASV will actually point to; base class definitions should probably be outside of this

Analogous to having a src/testing submodule that lives outside of an outer tests folder when using pytest

This file was deleted.

4 changes: 4 additions & 0 deletions src/nwb_benchmarks/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._nwbfile_read_base import FsspecNwbFileRead, RemfileNWBFileRead, Ros3NwbFileRead
from ._nwbfile_read_benchmarks import RemoteNwbFileReadBenchmark
from ._time_series_base import AcquisitionTimeSeriesSlice
from ._time_series_benchmarks import TimeSliceRequestBenchmark
63 changes: 63 additions & 0 deletions src/nwb_benchmarks/core/_nwbfile_read_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import abc
import warnings

import fsspec
import h5py
import pynwb
import remfile
from fsspec.asyn import reset_lock

warnings.filterwarnings(action="ignore", message="No cached namespaces found in .*")
warnings.filterwarnings(action="ignore", message="Ignoring cached namespace .*")


class NwbFileReadBase(abc.ABC):
"""
Base definition of a `read_nwbfile` method using a particular strategy.

Can be used by child classes for both setup and direct benchmarking.

The result of calling the `read_nwbfile` method is an instance attribute, "nwbfile", as an open file handle.

Child classes need to specify:
- the protocol to use to read the file by overridding the abstract `read_nwbfile`.
"""

@abc.abstractmethod
def read_nwbfile(self) -> None:
raise NotImplementedError


class RemoteNwbFileReadBase(NwbFileReadBase):
s3_url: str = None # S3 URL of the NWB asset

def setup(self):
assert self.s3_url is not None, "Test must set s3_url class variable"


class FsspecNwbFileRead(RemoteNwbFileReadBase):
def read_nwbfile(self) -> None:
reset_lock()
fsspec.get_filesystem_class("https").clear_instance_cache()

filesystem = fsspec.filesystem("https")

self.byte_stream = filesystem.open(path=self.s3_url, mode="rb")
self.file = h5py.File(name=self.byte_stream)
self.io = pynwb.NWBHDF5IO(file=self.file, load_namespaces=True)
self.nwbfile = self.io.read()


class Ros3NwbFileRead(RemoteNwbFileReadBase):
def read_nwbfile(self) -> None:
ros3_form = self.s3_url.replace("https://dandiarchive.s3.amazonaws.com", "s3://dandiarchive")
self.io = pynwb.NWBHDF5IO(path=ros3_form, mode="r", load_namespaces=True, driver="ros3")
self.nwbfile = self.io.read()


class RemfileNWBFileRead(RemoteNwbFileReadBase):
def read_nwbfile(self) -> None:
self.byte_stream = remfile.File(url=self.s3_url)
self.file = h5py.File(name=self.byte_stream)
self.io = pynwb.NWBHDF5IO(file=self.file, load_namespaces=True)
self.nwbfile = self.io.read()
8 changes: 8 additions & 0 deletions src/nwb_benchmarks/core/_nwbfile_read_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class RemoteNwbFileReadBenchmark:
"""Must be mixed in with a NwbFileReadBase child."""

def time_remote_nwbfile_read(self):
self.read_nwbfile()

def peakmem_remote_nwbfile_read(self):
self.read_nwbfile()
41 changes: 41 additions & 0 deletions src/nwb_benchmarks/core/_time_series_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import abc
import typing


class SliceRequestBase(abc.ABC):
slice_range: typing.Tuple[slice, int] # data selection to apply

@abc.abstractmethod
def slice_request(self):
raise NotImplementedError


class AcquisitionTimeSeriesSlice(SliceRequestBase):
"""
Define test case for slicing a TimeSeries located in the acquisition module.

Child classes must set:
- self.nwb_file : NWBFile object to use. Usually set in the setup function
- acquisition_path: class variable with name of object in nwbfile.acquisition
- slice_range: class variable with data selection to apply

Child classes must specify the performance metrics to use for the test cases by
specifying the corresponding test. E.g.:

.. code-block:: python

def time_slice_request(self):
self.slice_request()
"""

acquisition_path: str # name of object in nwbfile.acquisition

def slice_request(self):
"""
Definition of how to perform the slicing operation.

Output is stored in _temp attribute to avoid any benchmarks also including the garbage collection.
"""
self._temp = self.nwbfile.acquisition[self.acquisition_path].data[self.slice_range]

# TODO: add more tracking functions here
Loading
Loading