diff --git a/HISTORY.rst b/HISTORY.rst index 8290b57..4554113 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -4,6 +4,7 @@ History X.Y.Z (YYYY-MM-DD) ------------------ +* Open FITS files as memory-mapped on local file systems (:pr:`24`) * Remove obsolete logger (:pr:`23`) * Support lists of fits files (:pr:`21`) * Test stacking in the globbing case (:pr:`20`) diff --git a/tests/test_xarrayfits.py b/tests/test_xarrayfits.py index 451b566..11f8579 100644 --- a/tests/test_xarrayfits.py +++ b/tests/test_xarrayfits.py @@ -4,6 +4,7 @@ """Tests for `xarrayfits` package.""" from contextlib import ExitStack +import mmap import os.path from astropy.io import fits @@ -14,6 +15,7 @@ import xarray from xarrayfits import xds_from_fits +from xarrayfits.fits_proxy import FitsProxy @pytest.fixture(scope="session") @@ -200,3 +202,34 @@ def test_distributed(beam_cube): expected = np.arange(np.prod(xds.hdu0.shape)).reshape(xds.hdu0.shape) assert_array_equal(expected, xds.hdu0.data) assert xds.hdu0.data.chunks == ((100, 100, 57), (100, 100, 57), (15, 15, 2)) + + +def test_memory_mapped(beam_cube): + with fits.open(beam_cube, memmap=True) as hdu_list: + hdu_list[0].data[:] + astropy_file = hdu_list.fileinfo(0)["file"] + assert isinstance(astropy_file._mmap, mmap.mmap) + + proxy = FitsProxy(beam_cube) + assert proxy.is_memory_mapped + proxy.hdu_list[0].data[:] + astropy_file = proxy.hdu_list.fileinfo(0)["file"] + assert isinstance(astropy_file._mmap, mmap.mmap) + + proxy = FitsProxy(beam_cube, memmap=True) + assert proxy.is_memory_mapped + proxy.hdu_list[0].data[:] + astropy_file = proxy.hdu_list.fileinfo(0)["file"] + assert isinstance(astropy_file._mmap, mmap.mmap) + + proxy = FitsProxy(beam_cube, memmap=None) + assert proxy.is_memory_mapped + proxy.hdu_list[0].data[:] + astropy_file = proxy.hdu_list.fileinfo(0)["file"] + assert isinstance(astropy_file._mmap, mmap.mmap) + + proxy = FitsProxy(beam_cube, memmap=False) + assert not proxy.is_memory_mapped + proxy.hdu_list[0].data[:] + astropy_file = proxy.hdu_list.fileinfo(0)["file"] + assert astropy_file._mmap is None diff --git a/xarrayfits/fits.py b/xarrayfits/fits.py index 7b1f73a..e4518f3 100644 --- a/xarrayfits/fits.py +++ b/xarrayfits/fits.py @@ -12,6 +12,7 @@ import dask import dask.array as da import fsspec +from fsspec.implementations.local import LocalFileSystem import numpy as np import xarray as xr @@ -58,8 +59,16 @@ def slices(r): return (slice(s, e) for s, e in zip(r[:-1], r[1:])) -def _get_data_function(fp, h, i, dt): - data = fp.hdu_list[h].section[i] +# https://docs.astropy.org/en/stable/io/fits/index.html#working-with-large-files +# https://docs.astropy.org/en/stable/io/fits/index.html#working-with-remote-and-cloud-hosted-files + + +def _get_data_function(fits_proxy, h, i, dt): + if fits_proxy.is_memory_mapped: + data = fits_proxy.hdu_list[h].data[i] + else: + data = fits_proxy.hdu_list[h].section[i] + return data.astype(dt.newbyteorder("=")) @@ -213,8 +222,10 @@ def xds_from_fits(fits_filename, hdus=None, prefix="hdu", chunks=None): datasets = [] - for filename in (f.path for f in openfiles): - fits_proxy = FitsProxy(filename, use_fsspec=True) + for of in openfiles: + fits_proxy = FitsProxy( + of.full_name, use_fsspec=True, memmap=isinstance(of.fs, LocalFileSystem) + ) # Take all hdus if None specified if hdus is None: diff --git a/xarrayfits/fits_proxy.py b/xarrayfits/fits_proxy.py index 298db55..5fe12ea 100644 --- a/xarrayfits/fits_proxy.py +++ b/xarrayfits/fits_proxy.py @@ -45,6 +45,10 @@ def __init__(self, filename, **kwargs): def from_reduce_args(filename, kw): return FitsProxy(filename, **kw) + @property + def is_memory_mapped(self): + return self._kwargs.get("memmap") in {None, True} + @property def hdu_list(self): try: