From 8bc011e15f7ca1459207464581e44fa98231d96b Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Thu, 24 Aug 2023 15:30:41 +0100 Subject: [PATCH] Clean up NMR utils and add tests for Bruker reader --- pydatalab/pydatalab/apps/nmr/utils.py | 34 +++++++++++------ pydatalab/tests/apps/test_nmr.py | 54 +++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 pydatalab/tests/apps/test_nmr.py diff --git a/pydatalab/pydatalab/apps/nmr/utils.py b/pydatalab/pydatalab/apps/nmr/utils.py index b2288f424..89ec8e41f 100644 --- a/pydatalab/pydatalab/apps/nmr/utils.py +++ b/pydatalab/pydatalab/apps/nmr/utils.py @@ -1,6 +1,7 @@ import itertools import os import re +from pathlib import Path import matplotlib.pyplot as plt import nmrglue as ng @@ -13,15 +14,27 @@ ###################################################################################### -def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None): +def read_bruker_1d( + data: Path | pd.DataFrame, + process_number: int = 1, + verbose: bool = False, + sample_mass_mg: float | None = None, +) -> tuple[pd.DataFrame | None, dict, str | None, tuple[int, ...]]: """Read a 1D bruker nmr spectrum and return it as a df. - arguments: + Parameters: + data: The directory of the full bruker data file, or a pandas DataFrame which + will be returned without further processing. + process_number: The process number of the processed data you want to plot [default: 1]. + verbose: Whether to print information such as the spectrum title to stdout. + sample_mass_mg: The (optional) sample mass. If provided, the resulting DataFrame will have a "intensity_per_scan_per_gram" column. + + Returns: + df: A pandas DataFrame containing the spectrum data, or None if the reading failed. + a_dic: A dictionary containing the acquisition parameters. + topspin_title: The title of the spectrum, as stored in the topspin "title" file. + shape: The shape of the spectrum data array. - data: The directory of the full bruker data file. You may also supply a df as this argument. In this case, the df is returned as is. - process_number: The process number of the processed data you want to plot [default 1] - verbose: Whether to print information such as the spectrum title to stdout (default True) - sample_mass_mg: The (optional) sample mass. If provided, the resulting DataFrame will have a "intensity_per_scan_per_gram" column. """ # if df is provided, just return it as-is. This functionality is provided to make functions calling read_bruker_1d flexible by default. @@ -32,12 +45,12 @@ def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None): print("data frame provided to read_bruker_1d(). Returning it as is.") return data else: - data_dir = data + data_dir = Path(data) - processed_data_dir = os.path.join(data_dir, "pdata", str(process_number)) + processed_data_dir = data_dir / "pdata" / str(process_number) - a_dic, a_data = ng.fileio.bruker.read(data_dir) # aquisition_data - p_dic, p_data = ng.fileio.bruker.read_pdata(processed_data_dir) # processing data + a_dic, a_data = ng.fileio.bruker.read(str(data_dir)) # aquisition_data + p_dic, p_data = ng.fileio.bruker.read_pdata(str(processed_data_dir)) # processing data try: with open(os.path.join(processed_data_dir, "title"), "r") as f: @@ -46,7 +59,6 @@ def read_bruker_1d(data, process_number=1, verbose=True, sample_mass_mg=None): topspin_title = None if len(p_data.shape) > 1: - print("data is more than one dimensional - read failed") return None, a_dic, topspin_title, p_data.shape nscans = a_dic["acqus"]["NS"] diff --git a/pydatalab/tests/apps/test_nmr.py b/pydatalab/tests/apps/test_nmr.py new file mode 100644 index 000000000..f9be2b30f --- /dev/null +++ b/pydatalab/tests/apps/test_nmr.py @@ -0,0 +1,54 @@ +import zipfile +from pathlib import Path + +import pytest + +from pydatalab.apps.nmr.utils import read_bruker_1d + + +def _extract_example(filename, dir): + with zipfile.ZipFile(filename, "r") as zip_ref: + zip_ref.extractall(dir) + return Path(dir) / filename.stem + + +@pytest.fixture(scope="function") +def nmr_1d_solution_example(tmpdir): + zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "1.zip" + return _extract_example(zip_path, tmpdir) + + +@pytest.fixture(scope="function") +def nmr_1d_solid_example(tmpdir): + zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "71.zip" + return _extract_example(zip_path, tmpdir) + + +@pytest.fixture(scope="function") +def nmr_2d_matpass_example(tmpdir): + zip_path = Path(__file__).parent.parent.parent / "example_data" / "NMR" / "72.zip" + return _extract_example(zip_path, tmpdir) + + +def test_bruker_reader_solution(nmr_1d_solution_example): + df, a_dic, topspin_title, shape = read_bruker_1d(nmr_1d_solution_example) + assert df is not None + assert a_dic + assert topspin_title + assert shape == (4096,) + + +def test_bruker_reader_solid(nmr_1d_solid_example): + df, a_dic, topspin_title, shape = read_bruker_1d(nmr_1d_solid_example) + assert df is not None + assert a_dic + assert topspin_title + assert shape == (9984,) + + +def test_bruker_reader_2D(nmr_2d_matpass_example): + df, a_dic, topspin_title, shape = read_bruker_1d(nmr_2d_matpass_example) + assert df is None + assert a_dic + assert topspin_title + assert shape == (8, 4096)